提交 2bddbdf2 authored 作者: Thomas Mueller's avatar Thomas Mueller

Support for the ICU4J collator.

上级 d4708573
...@@ -1027,6 +1027,12 @@ Sets the collation used for comparing strings. ...@@ -1027,6 +1027,12 @@ Sets the collation used for comparing strings.
This command can only be executed if there are no tables defined. This command can only be executed if there are no tables defined.
See ""java.text.Collator"" for details about the supported collations and the STRENGTH. See ""java.text.Collator"" for details about the supported collations and the STRENGTH.
The ICU4J collator is used if it is in the classpath.
It is also used if the collation name starts with ICU4J_
(in that case, the ICU4J must be in the classpath, otherwise an exception is thrown).
The default collator is used if the collation name starts with DEFAULT_
(even if ICU4J is in the classpath).
Admin rights are required to execute this command. Admin rights are required to execute this command.
This command commits an open transaction. This command commits an open transaction.
This setting is persistent. This setting is persistent.
......
...@@ -17,7 +17,8 @@ Change Log ...@@ -17,7 +17,8 @@ Change Log
<h1>Change Log</h1> <h1>Change Log</h1>
<h2>Next Version (unreleased)</h2> <h2>Next Version (unreleased)</h2>
<ul><li>Improved Oracle compatibility: support for NVL2. Thanks to litailang for the patch! <ul><li>Support for the ICU4J collator.
</li><li>Improved Oracle compatibility: support for NVL2. Thanks to litailang for the patch!
</li><li>Improved PostgreSQL compatibility: support for RANDOM() in addition to RAND(). </li><li>Improved PostgreSQL compatibility: support for RANDOM() in addition to RAND().
</li><li>There was a classloader memory leak problem because a class contained a static </li><li>There was a classloader memory leak problem because a class contained a static
references to an exception (including stack trace). references to an exception (including stack trace).
......
...@@ -157,22 +157,23 @@ is well tested (if possible with automated test cases). The areas that are not w ...@@ -157,22 +157,23 @@ is well tested (if possible with automated test cases). The areas that are not w
</p> </p>
<ul> <ul>
<li>Platforms other than Windows XP, Linux, Mac OS X, or JVMs other than Sun 1.5 or 1.6 <li>Platforms other than Windows XP, Linux, Mac OS X, or JVMs other than Sun 1.5 or 1.6
</li><li>The features <code>AUTO_SERVER</code> and <code>AUTO_RECONNECT</code> </li><li>The features <code>AUTO_SERVER</code> and <code>AUTO_RECONNECT</code>.
</li><li>The file locking method 'Serialized' </li><li>The file locking method 'Serialized'.
</li><li>Cluster mode, 2-phase commit, savepoints </li><li>Cluster mode, 2-phase commit, savepoints.
</li><li>24/7 operation </li><li>24/7 operation.
</li><li>Fulltext search </li><li>Fulltext search.
</li><li>Operations on LOBs over 2 GB </li><li>Operations on LOBs over 2 GB.
</li><li>Some operations on databases larger than 500 MB may be slower than expected </li><li>Some operations on databases larger than 500 MB may be slower than expected.
</li><li>The optimizer may not always select the best plan </li><li>The optimizer may not always select the best plan.
</li><li>Using the ICU4J collator.
</li></ul> </li></ul>
<p> <p>
Areas considered experimental are: Areas considered experimental are:
</p> </p>
<ul> <ul>
<li>The PostgreSQL server <li>The PostgreSQL server
</li><li>Multi-threading within the engine using <code>SET MULTI_THREADED=1</code> </li><li>Multi-threading within the engine using <code>SET MULTI_THREADED=1</code>.
</li><li>Compatibility modes for other databases (only some features are implemented) </li><li>Compatibility modes for other databases (only some features are implemented).
</li><li>The soft reference cache (<code>CACHE_TYPE=SOFT_LRU</code>). It might not improve performance, </li><li>The soft reference cache (<code>CACHE_TYPE=SOFT_LRU</code>). It might not improve performance,
and out of memory issues have been reported. and out of memory issues have been reported.
</li></ul> </li></ul>
......
...@@ -116,7 +116,8 @@ Features ...@@ -116,7 +116,8 @@ Features
</li><li>Wide range of data types including large objects (BLOB/CLOB) and arrays </li><li>Wide range of data types including large objects (BLOB/CLOB) and arrays
</li><li>Sequence and autoincrement columns, computed columns (can be used for function based indexes) </li><li>Sequence and autoincrement columns, computed columns (can be used for function based indexes)
</li><li><code>ORDER BY, GROUP BY, HAVING, UNION, LIMIT, TOP</code> </li><li><code>ORDER BY, GROUP BY, HAVING, UNION, LIMIT, TOP</code>
</li><li>Collation support, users, roles </li><li>Collation support, including support for the ICU4J library
</li><li>Support for users and roles
</li><li>Compatibility modes for IBM DB2, Apache Derby, HSQLDB, </li><li>Compatibility modes for IBM DB2, Apache Derby, HSQLDB,
MS SQL Server, MySQL, Oracle, and PostgreSQL. MS SQL Server, MySQL, Oracle, and PostgreSQL.
</li></ul> </li></ul>
......
...@@ -4520,7 +4520,7 @@ public class Parser { ...@@ -4520,7 +4520,7 @@ public class Parser {
} }
Collator coll = CompareMode.getCollator(name); Collator coll = CompareMode.getCollator(name);
if (coll == null) { if (coll == null) {
throw getSyntaxError(); throw DbException.getInvalidValueException("collation", name);
} }
if (readIf("STRENGTH")) { if (readIf("STRENGTH")) {
if (readIf("PRIMARY")) { if (readIf("PRIMARY")) {
......
...@@ -6,48 +6,53 @@ ...@@ -6,48 +6,53 @@
*/ */
package org.h2.value; package org.h2.value;
import java.text.CollationKey;
import java.text.Collator; import java.text.Collator;
import java.util.Locale; import java.util.Locale;
import org.h2.constant.SysProperties;
import org.h2.util.SmallLRUCache;
import org.h2.util.StringUtils; import org.h2.util.StringUtils;
/** /**
* Instances of this class can compare strings. * Instances of this class can compare strings. Case sensitive and case
* Case sensitive and case insensitive comparison is supported, * insensitive comparison is supported, and comparison using a collator.
* and comparison using a collator.
*/ */
public class CompareMode { public class CompareMode {
/** /**
* This constant means there is no collator set, * This constant means there is no collator set, and the default string
* and the default string comparison is to be used. * comparison is to be used.
*/ */
public static final String OFF = "OFF"; public static final String OFF = "OFF";
/**
* This constant means the default collator should be used, even if ICU4J is
* in the classpath.
*/
public static final String DEFAULT = "DEFAULT_";
/**
* This constant means ICU4J should be used (this will fail if it is not in
* the classpath).
*/
public static final String ICU4J = "ICU4J_";
private static CompareMode lastUsed; private static CompareMode lastUsed;
private static boolean canUseICU4J;
static {
try {
Class.forName("com.ibm.icu.text.Collator");
canUseICU4J = true;
} catch (Exception e) {
// ignore
}
}
private final String name; private final String name;
private final int strength; private final int strength;
private final Collator collator;
private final SmallLRUCache<String, CollationKey> collationKeys;
private CompareMode(String name, int strength) { protected CompareMode(String name, int strength) {
this.name = name; this.name = name;
this.strength = strength; this.strength = strength;
this.collator = CompareMode.getCollator(name);
int cacheSize = 0;
if (collator != null) {
this.collator.setStrength(strength);
cacheSize = SysProperties.COLLATOR_CACHE_SIZE;
}
if (cacheSize != 0) {
collationKeys = SmallLRUCache.newInstance(cacheSize);
} else {
collationKeys = null;
}
} }
/** /**
...@@ -68,7 +73,25 @@ public class CompareMode { ...@@ -68,7 +73,25 @@ public class CompareMode {
} }
} }
} }
if (name == null || name.equals(OFF)) {
lastUsed = new CompareMode(name, strength); lastUsed = new CompareMode(name, strength);
} else {
boolean useICU4J;
if (name.startsWith(ICU4J)) {
useICU4J = true;
name = name.substring(ICU4J.length());
} else if (name.startsWith(DEFAULT)) {
useICU4J = false;
name = name.substring(DEFAULT.length());
} else {
useICU4J = canUseICU4J;
}
if (useICU4J) {
lastUsed = new CompareModeIcu4J(name, strength);
} else {
lastUsed = new CompareModeDefault(name, strength);
}
}
return lastUsed; return lastUsed;
} }
...@@ -83,9 +106,6 @@ public class CompareMode { ...@@ -83,9 +106,6 @@ public class CompareMode {
* @return true if the characters are equals * @return true if the characters are equals
*/ */
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) { public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
if (collator != null) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
char ca = a.charAt(ai); char ca = a.charAt(ai);
char cb = b.charAt(bi); char cb = b.charAt(bi);
if (ignoreCase) { if (ignoreCase) {
...@@ -105,38 +125,11 @@ public class CompareMode { ...@@ -105,38 +125,11 @@ public class CompareMode {
* smaller, and 0 if they are equal * smaller, and 0 if they are equal
*/ */
public int compareString(String a, String b, boolean ignoreCase) { public int compareString(String a, String b, boolean ignoreCase) {
if (collator == null) {
if (ignoreCase) { if (ignoreCase) {
return a.compareToIgnoreCase(b); return a.compareToIgnoreCase(b);
} }
return a.compareTo(b); return a.compareTo(b);
} }
if (ignoreCase) {
// this is locale sensitive
a = a.toUpperCase();
b = b.toUpperCase();
}
int comp;
if (collationKeys != null) {
CollationKey aKey = getKey(a);
CollationKey bKey = getKey(b);
comp = aKey.compareTo(bKey);
} else {
comp = collator.compare(a, b);
}
return comp;
}
private CollationKey getKey(String a) {
synchronized (collationKeys) {
CollationKey key = collationKeys.get(a);
if (key == null) {
key = collator.getCollationKey(a);
collationKeys.put(a, key);
}
return key;
}
}
/** /**
* Get the collation name. * Get the collation name.
...@@ -151,7 +144,15 @@ public class CompareMode { ...@@ -151,7 +144,15 @@ public class CompareMode {
return name; return name;
} }
private static boolean compareLocaleNames(Locale locale, String name) { /**
* Compare name name of the locale with the given name. The case of the name
* is ignored.
*
* @param locale the locale
* @param name the name
* @return true if they match
*/
static boolean compareLocaleNames(Locale locale, String name) {
return name.equalsIgnoreCase(locale.toString()) || name.equalsIgnoreCase(getName(locale)); return name.equalsIgnoreCase(locale.toString()) || name.equalsIgnoreCase(getName(locale));
} }
...@@ -163,10 +164,12 @@ public class CompareMode { ...@@ -163,10 +164,12 @@ public class CompareMode {
* @return the collator * @return the collator
*/ */
public static Collator getCollator(String name) { public static Collator getCollator(String name) {
if (name == null || name.equals(OFF)) {
return null;
}
Collator result = null; Collator result = null;
if (name.startsWith(ICU4J)) {
name = name.substring(ICU4J.length());
} else if (name.startsWith(DEFAULT)) {
name = name.substring(DEFAULT.length());
}
if (name.length() == 2) { if (name.length() == 2) {
Locale locale = new Locale(name.toLowerCase(), ""); Locale locale = new Locale(name.toLowerCase(), "");
if (compareLocaleNames(locale, name)) { if (compareLocaleNames(locale, name)) {
......
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.value;
import java.text.CollationKey;
import java.text.Collator;
import org.h2.constant.SysProperties;
import org.h2.message.DbException;
import org.h2.util.SmallLRUCache;
/**
* The default implementation of CompareMode. It uses java.text.Collator.
*/
public class CompareModeDefault extends CompareMode {
private final Collator collator;
private final SmallLRUCache<String, CollationKey> collationKeys;
protected CompareModeDefault(String name, int strength) {
super(name, strength);
collator = CompareMode.getCollator(name);
if (collator == null) {
throw DbException.throwInternalError(name);
}
collator.setStrength(strength);
int cacheSize = SysProperties.COLLATOR_CACHE_SIZE;
if (cacheSize != 0) {
collationKeys = SmallLRUCache.newInstance(cacheSize);
} else {
collationKeys = null;
}
}
public int compareString(String a, String b, boolean ignoreCase) {
if (ignoreCase) {
// this is locale sensitive
a = a.toUpperCase();
b = b.toUpperCase();
}
int comp;
if (collationKeys != null) {
CollationKey aKey = getKey(a);
CollationKey bKey = getKey(b);
comp = aKey.compareTo(bKey);
} else {
comp = collator.compare(a, b);
}
return comp;
}
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
private CollationKey getKey(String a) {
synchronized (collationKeys) {
CollationKey key = collationKeys.get(a);
if (key == null) {
key = collator.getCollationKey(a);
collationKeys.put(a, key);
}
return key;
}
}
}
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.value;
import java.lang.reflect.Method;
import java.util.Comparator;
import java.util.Locale;
import org.h2.message.DbException;
import org.h2.util.Utils;
/**
* An implementation of CompareMode that uses the ICU4J Collator.
*/
public class CompareModeIcu4J extends CompareMode {
private final Comparator<String> collator;
protected CompareModeIcu4J(String name, int strength) {
super(name, strength);
collator = getIcu4jCollator(name, strength);
}
public int compareString(String a, String b, boolean ignoreCase) {
if (ignoreCase) {
a = a.toUpperCase();
b = b.toUpperCase();
}
return collator.compare(a, b);
}
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
@SuppressWarnings("unchecked")
private static Comparator<String> getIcu4jCollator(String name, int strength) {
try {
Comparator<String> result = null;
Class<?> collatorClass = Utils.loadUserClass("com.ibm.icu.text.Collator");
Method getInstanceMethod = collatorClass.getMethod("getInstance", Locale.class);
if (name.length() == 2) {
Locale locale = new Locale(name.toLowerCase(), "");
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
}
} else if (name.length() == 5) {
// LL_CC (language_country)
int idx = name.indexOf('_');
if (idx >= 0) {
String language = name.substring(0, idx).toLowerCase();
String country = name.substring(idx + 1);
Locale locale = new Locale(language, country);
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
}
}
}
if (result == null) {
for (Locale locale : (Locale[]) collatorClass.getMethod("getAvailableLocales").invoke(null)) {
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
break;
}
}
}
if (result == null) {
throw DbException.getInvalidValueException("collator", name);
}
collatorClass.getMethod("setStrength", int.class).invoke(result, strength);
return result;
} catch (Exception e) {
throw DbException.convert(e);
}
}
}
...@@ -119,6 +119,7 @@ import org.h2.test.unit.TestAutoReconnect; ...@@ -119,6 +119,7 @@ import org.h2.test.unit.TestAutoReconnect;
import org.h2.test.unit.TestBitField; import org.h2.test.unit.TestBitField;
import org.h2.test.unit.TestCache; import org.h2.test.unit.TestCache;
import org.h2.test.unit.TestClearReferences; import org.h2.test.unit.TestClearReferences;
import org.h2.test.unit.TestCollation;
import org.h2.test.unit.TestCompress; import org.h2.test.unit.TestCompress;
import org.h2.test.unit.TestConnectionInfo; import org.h2.test.unit.TestConnectionInfo;
import org.h2.test.unit.TestDataPage; import org.h2.test.unit.TestDataPage;
...@@ -649,6 +650,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1` ...@@ -649,6 +650,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1`
new TestAutoReconnect().runTest(this); new TestAutoReconnect().runTest(this);
new TestCache().runTest(this); new TestCache().runTest(this);
new TestClearReferences().runTest(this); new TestClearReferences().runTest(this);
new TestCollation().runTest(this);
new TestCompress().runTest(this); new TestCompress().runTest(this);
new TestConnectionInfo().runTest(this); new TestConnectionInfo().runTest(this);
new TestDataPage().runTest(this); new TestDataPage().runTest(this);
......
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.test.unit;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import org.h2.constant.ErrorCode;
import org.h2.test.TestBase;
/**
* Test the ICU4J collator.
*/
public class TestCollation extends TestBase {
/**
* Run just this test.
*
* @param a ignored
*/
public static void main(String... a) throws Exception {
TestBase.createCaller().init().test();
}
public void test() throws Exception {
deleteDb("collation");
Connection conn = getConnection("collation");
Statement stat = conn.createStatement();
try {
stat.execute("set collation xyz");
fail();
} catch (SQLException e) {
assertEquals(ErrorCode.INVALID_VALUE_2, e.getErrorCode());
}
stat.execute("set collation en");
stat.execute("set collation default_en");
try {
stat.execute("set collation icu4j_en");
} catch (SQLException e) {
assertEquals(ErrorCode.CLASS_NOT_FOUND_1, e.getErrorCode());
}
conn.close();
deleteDb("collation");
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论