提交 2bddbdf2 authored 作者: Thomas Mueller's avatar Thomas Mueller

Support for the ICU4J collator.

上级 d4708573
......@@ -1027,6 +1027,12 @@ Sets the collation used for comparing strings.
This command can only be executed if there are no tables defined.
See ""java.text.Collator"" for details about the supported collations and the STRENGTH.
The ICU4J collator is used if it is in the classpath.
It is also used if the collation name starts with ICU4J_
(in that case, the ICU4J must be in the classpath, otherwise an exception is thrown).
The default collator is used if the collation name starts with DEFAULT_
(even if ICU4J is in the classpath).
Admin rights are required to execute this command.
This command commits an open transaction.
This setting is persistent.
......
......@@ -17,7 +17,8 @@ Change Log
<h1>Change Log</h1>
<h2>Next Version (unreleased)</h2>
<ul><li>Improved Oracle compatibility: support for NVL2. Thanks to litailang for the patch!
<ul><li>Support for the ICU4J collator.
</li><li>Improved Oracle compatibility: support for NVL2. Thanks to litailang for the patch!
</li><li>Improved PostgreSQL compatibility: support for RANDOM() in addition to RAND().
</li><li>There was a classloader memory leak problem because a class contained a static
references to an exception (including stack trace).
......
......@@ -157,22 +157,23 @@ is well tested (if possible with automated test cases). The areas that are not w
</p>
<ul>
<li>Platforms other than Windows XP, Linux, Mac OS X, or JVMs other than Sun 1.5 or 1.6
</li><li>The features <code>AUTO_SERVER</code> and <code>AUTO_RECONNECT</code>
</li><li>The file locking method 'Serialized'
</li><li>Cluster mode, 2-phase commit, savepoints
</li><li>24/7 operation
</li><li>Fulltext search
</li><li>Operations on LOBs over 2 GB
</li><li>Some operations on databases larger than 500 MB may be slower than expected
</li><li>The optimizer may not always select the best plan
</li><li>The features <code>AUTO_SERVER</code> and <code>AUTO_RECONNECT</code>.
</li><li>The file locking method 'Serialized'.
</li><li>Cluster mode, 2-phase commit, savepoints.
</li><li>24/7 operation.
</li><li>Fulltext search.
</li><li>Operations on LOBs over 2 GB.
</li><li>Some operations on databases larger than 500 MB may be slower than expected.
</li><li>The optimizer may not always select the best plan.
</li><li>Using the ICU4J collator.
</li></ul>
<p>
Areas considered experimental are:
</p>
<ul>
<li>The PostgreSQL server
</li><li>Multi-threading within the engine using <code>SET MULTI_THREADED=1</code>
</li><li>Compatibility modes for other databases (only some features are implemented)
</li><li>Multi-threading within the engine using <code>SET MULTI_THREADED=1</code>.
</li><li>Compatibility modes for other databases (only some features are implemented).
</li><li>The soft reference cache (<code>CACHE_TYPE=SOFT_LRU</code>). It might not improve performance,
and out of memory issues have been reported.
</li></ul>
......
......@@ -116,7 +116,8 @@ Features
</li><li>Wide range of data types including large objects (BLOB/CLOB) and arrays
</li><li>Sequence and autoincrement columns, computed columns (can be used for function based indexes)
</li><li><code>ORDER BY, GROUP BY, HAVING, UNION, LIMIT, TOP</code>
</li><li>Collation support, users, roles
</li><li>Collation support, including support for the ICU4J library
</li><li>Support for users and roles
</li><li>Compatibility modes for IBM DB2, Apache Derby, HSQLDB,
MS SQL Server, MySQL, Oracle, and PostgreSQL.
</li></ul>
......
......@@ -4520,7 +4520,7 @@ public class Parser {
}
Collator coll = CompareMode.getCollator(name);
if (coll == null) {
throw getSyntaxError();
throw DbException.getInvalidValueException("collation", name);
}
if (readIf("STRENGTH")) {
if (readIf("PRIMARY")) {
......
......@@ -6,48 +6,53 @@
*/
package org.h2.value;
import java.text.CollationKey;
import java.text.Collator;
import java.util.Locale;
import org.h2.constant.SysProperties;
import org.h2.util.SmallLRUCache;
import org.h2.util.StringUtils;
/**
* Instances of this class can compare strings.
* Case sensitive and case insensitive comparison is supported,
* and comparison using a collator.
* Instances of this class can compare strings. Case sensitive and case
* insensitive comparison is supported, and comparison using a collator.
*/
public class CompareMode {
/**
* This constant means there is no collator set,
* and the default string comparison is to be used.
* This constant means there is no collator set, and the default string
* comparison is to be used.
*/
public static final String OFF = "OFF";
/**
* This constant means the default collator should be used, even if ICU4J is
* in the classpath.
*/
public static final String DEFAULT = "DEFAULT_";
/**
* This constant means ICU4J should be used (this will fail if it is not in
* the classpath).
*/
public static final String ICU4J = "ICU4J_";
private static CompareMode lastUsed;
private static boolean canUseICU4J;
static {
try {
Class.forName("com.ibm.icu.text.Collator");
canUseICU4J = true;
} catch (Exception e) {
// ignore
}
}
private final String name;
private final int strength;
private final Collator collator;
private final SmallLRUCache<String, CollationKey> collationKeys;
private CompareMode(String name, int strength) {
protected CompareMode(String name, int strength) {
this.name = name;
this.strength = strength;
this.collator = CompareMode.getCollator(name);
int cacheSize = 0;
if (collator != null) {
this.collator.setStrength(strength);
cacheSize = SysProperties.COLLATOR_CACHE_SIZE;
}
if (cacheSize != 0) {
collationKeys = SmallLRUCache.newInstance(cacheSize);
} else {
collationKeys = null;
}
}
/**
......@@ -68,7 +73,25 @@ public class CompareMode {
}
}
}
if (name == null || name.equals(OFF)) {
lastUsed = new CompareMode(name, strength);
} else {
boolean useICU4J;
if (name.startsWith(ICU4J)) {
useICU4J = true;
name = name.substring(ICU4J.length());
} else if (name.startsWith(DEFAULT)) {
useICU4J = false;
name = name.substring(DEFAULT.length());
} else {
useICU4J = canUseICU4J;
}
if (useICU4J) {
lastUsed = new CompareModeIcu4J(name, strength);
} else {
lastUsed = new CompareModeDefault(name, strength);
}
}
return lastUsed;
}
......@@ -83,9 +106,6 @@ public class CompareMode {
* @return true if the characters are equals
*/
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
if (collator != null) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
char ca = a.charAt(ai);
char cb = b.charAt(bi);
if (ignoreCase) {
......@@ -105,38 +125,11 @@ public class CompareMode {
* smaller, and 0 if they are equal
*/
public int compareString(String a, String b, boolean ignoreCase) {
if (collator == null) {
if (ignoreCase) {
return a.compareToIgnoreCase(b);
}
return a.compareTo(b);
}
if (ignoreCase) {
// this is locale sensitive
a = a.toUpperCase();
b = b.toUpperCase();
}
int comp;
if (collationKeys != null) {
CollationKey aKey = getKey(a);
CollationKey bKey = getKey(b);
comp = aKey.compareTo(bKey);
} else {
comp = collator.compare(a, b);
}
return comp;
}
private CollationKey getKey(String a) {
synchronized (collationKeys) {
CollationKey key = collationKeys.get(a);
if (key == null) {
key = collator.getCollationKey(a);
collationKeys.put(a, key);
}
return key;
}
}
/**
* Get the collation name.
......@@ -151,7 +144,15 @@ public class CompareMode {
return name;
}
private static boolean compareLocaleNames(Locale locale, String name) {
/**
* Compare name name of the locale with the given name. The case of the name
* is ignored.
*
* @param locale the locale
* @param name the name
* @return true if they match
*/
static boolean compareLocaleNames(Locale locale, String name) {
return name.equalsIgnoreCase(locale.toString()) || name.equalsIgnoreCase(getName(locale));
}
......@@ -163,10 +164,12 @@ public class CompareMode {
* @return the collator
*/
public static Collator getCollator(String name) {
if (name == null || name.equals(OFF)) {
return null;
}
Collator result = null;
if (name.startsWith(ICU4J)) {
name = name.substring(ICU4J.length());
} else if (name.startsWith(DEFAULT)) {
name = name.substring(DEFAULT.length());
}
if (name.length() == 2) {
Locale locale = new Locale(name.toLowerCase(), "");
if (compareLocaleNames(locale, name)) {
......
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.value;
import java.text.CollationKey;
import java.text.Collator;
import org.h2.constant.SysProperties;
import org.h2.message.DbException;
import org.h2.util.SmallLRUCache;
/**
* The default implementation of CompareMode. It uses java.text.Collator.
*/
public class CompareModeDefault extends CompareMode {
private final Collator collator;
private final SmallLRUCache<String, CollationKey> collationKeys;
protected CompareModeDefault(String name, int strength) {
super(name, strength);
collator = CompareMode.getCollator(name);
if (collator == null) {
throw DbException.throwInternalError(name);
}
collator.setStrength(strength);
int cacheSize = SysProperties.COLLATOR_CACHE_SIZE;
if (cacheSize != 0) {
collationKeys = SmallLRUCache.newInstance(cacheSize);
} else {
collationKeys = null;
}
}
public int compareString(String a, String b, boolean ignoreCase) {
if (ignoreCase) {
// this is locale sensitive
a = a.toUpperCase();
b = b.toUpperCase();
}
int comp;
if (collationKeys != null) {
CollationKey aKey = getKey(a);
CollationKey bKey = getKey(b);
comp = aKey.compareTo(bKey);
} else {
comp = collator.compare(a, b);
}
return comp;
}
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
private CollationKey getKey(String a) {
synchronized (collationKeys) {
CollationKey key = collationKeys.get(a);
if (key == null) {
key = collator.getCollationKey(a);
collationKeys.put(a, key);
}
return key;
}
}
}
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.value;
import java.lang.reflect.Method;
import java.util.Comparator;
import java.util.Locale;
import org.h2.message.DbException;
import org.h2.util.Utils;
/**
* An implementation of CompareMode that uses the ICU4J Collator.
*/
public class CompareModeIcu4J extends CompareMode {
private final Comparator<String> collator;
protected CompareModeIcu4J(String name, int strength) {
super(name, strength);
collator = getIcu4jCollator(name, strength);
}
public int compareString(String a, String b, boolean ignoreCase) {
if (ignoreCase) {
a = a.toUpperCase();
b = b.toUpperCase();
}
return collator.compare(a, b);
}
public boolean equalsChars(String a, int ai, String b, int bi, boolean ignoreCase) {
return compareString(a.substring(ai, ai + 1), b.substring(bi, bi + 1), ignoreCase) == 0;
}
@SuppressWarnings("unchecked")
private static Comparator<String> getIcu4jCollator(String name, int strength) {
try {
Comparator<String> result = null;
Class<?> collatorClass = Utils.loadUserClass("com.ibm.icu.text.Collator");
Method getInstanceMethod = collatorClass.getMethod("getInstance", Locale.class);
if (name.length() == 2) {
Locale locale = new Locale(name.toLowerCase(), "");
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
}
} else if (name.length() == 5) {
// LL_CC (language_country)
int idx = name.indexOf('_');
if (idx >= 0) {
String language = name.substring(0, idx).toLowerCase();
String country = name.substring(idx + 1);
Locale locale = new Locale(language, country);
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
}
}
}
if (result == null) {
for (Locale locale : (Locale[]) collatorClass.getMethod("getAvailableLocales").invoke(null)) {
if (compareLocaleNames(locale, name)) {
result = (Comparator<String>) getInstanceMethod.invoke(null, locale);
break;
}
}
}
if (result == null) {
throw DbException.getInvalidValueException("collator", name);
}
collatorClass.getMethod("setStrength", int.class).invoke(result, strength);
return result;
} catch (Exception e) {
throw DbException.convert(e);
}
}
}
......@@ -119,6 +119,7 @@ import org.h2.test.unit.TestAutoReconnect;
import org.h2.test.unit.TestBitField;
import org.h2.test.unit.TestCache;
import org.h2.test.unit.TestClearReferences;
import org.h2.test.unit.TestCollation;
import org.h2.test.unit.TestCompress;
import org.h2.test.unit.TestConnectionInfo;
import org.h2.test.unit.TestDataPage;
......@@ -649,6 +650,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1`
new TestAutoReconnect().runTest(this);
new TestCache().runTest(this);
new TestClearReferences().runTest(this);
new TestCollation().runTest(this);
new TestCompress().runTest(this);
new TestConnectionInfo().runTest(this);
new TestDataPage().runTest(this);
......
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.test.unit;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import org.h2.constant.ErrorCode;
import org.h2.test.TestBase;
/**
* Test the ICU4J collator.
*/
public class TestCollation extends TestBase {
/**
* Run just this test.
*
* @param a ignored
*/
public static void main(String... a) throws Exception {
TestBase.createCaller().init().test();
}
public void test() throws Exception {
deleteDb("collation");
Connection conn = getConnection("collation");
Statement stat = conn.createStatement();
try {
stat.execute("set collation xyz");
fail();
} catch (SQLException e) {
assertEquals(ErrorCode.INVALID_VALUE_2, e.getErrorCode());
}
stat.execute("set collation en");
stat.execute("set collation default_en");
try {
stat.execute("set collation icu4j_en");
} catch (SQLException e) {
assertEquals(ErrorCode.CLASS_NOT_FOUND_1, e.getErrorCode());
}
conn.close();
deleteDb("collation");
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论