提交 3049b1ed authored 作者: Niklas Mehner's avatar Niklas Mehner

Implement charset collator.

The charset collator allows sorting strings according to their binary representation in a given charset.
上级 f5d37007
......@@ -1180,12 +1180,16 @@ It is also used if the collation name starts with ICU4J_
(in that case, the ICU4J must be in the classpath, otherwise an exception is thrown).
The default collator is used if the collation name starts with DEFAULT_
(even if ICU4J is in the classpath).
The charset collator is used if the collation name starts with CHARSET_ (e.g. CHARSET_CP500). This collator sorts
strings according to the binary representation in the given charset.
Admin rights are required to execute this command.
This command commits an open transaction in this connection.
This setting is persistent.
This setting can be appended to the database URL: ""jdbc:h2:test;COLLATION='ENGLISH'""
","
SET COLLATION ENGLISH
SET COLLATION CHARSET_CP500
"
"Commands (Other)","SET COMPRESS_LOB","
......
package org.h2.value;
import java.nio.charset.Charset;
import java.text.CollationKey;
import java.text.Collator;
import java.util.Comparator;
/**
* The charset collator sorts strings according to the order in the given charset.
*/
public class CharsetCollator extends Collator {
private static final Comparator<byte[]> COMPARATOR = new Comparator<byte[]>() {
@Override
public int compare(byte[] b1, byte[] b2) {
int minLength = Math.min(b1.length, b2.length);
for (int index = 0; index < minLength; index++) {
int result = b1[index] - b2[index];
if (result != 0) {
return result;
}
}
return b1.length - b2.length;
}
};
private final Charset charset;
public CharsetCollator(Charset charset) {
this.charset = charset;
}
public Charset getCharset() {
return charset;
}
@Override
public int compare(String source, String target) {
return COMPARATOR.compare(toBytes(source), toBytes(target));
}
private byte[] toBytes(String source) {
return source.getBytes(charset);
}
@Override
public CollationKey getCollationKey(final String source) {
return new CharsetCollationKey(source);
}
@Override
public int hashCode() {
return 255;
}
private class CharsetCollationKey extends CollationKey {
CharsetCollationKey(String source) {
super(source);
}
@Override
public int compareTo(CollationKey target) {
return COMPARATOR.compare(toByteArray(), toBytes(target.getSourceString()));
}
@Override
public byte[] toByteArray() {
return toBytes(getSourceString());
}
}
}
......@@ -5,6 +5,7 @@
*/
package org.h2.value;
import java.nio.charset.Charset;
import java.text.Collator;
import java.util.Locale;
......@@ -35,6 +36,12 @@ public class CompareMode {
*/
public static final String ICU4J = "ICU4J_";
/**
* This constant means the charset specified should be used. This will fail if the specified charset does
* not exist.
*/
public static final String CHARSET = "CHARSET_";
/**
* This constant means that the BINARY columns are sorted as if the bytes
* were signed.
......@@ -210,6 +217,8 @@ public class CompareMode {
name = name.substring(ICU4J.length());
} else if (name.startsWith(DEFAULT)) {
name = name.substring(DEFAULT.length());
} else if (name.startsWith(CHARSET)) {
return new CharsetCollator(Charset.forName(name.substring(CHARSET.length())));
}
if (name.length() == 2) {
Locale locale = new Locale(StringUtils.toLowerEnglish(name), "");
......
......@@ -67,6 +67,7 @@ import org.h2.test.db.TestScriptSimple;
import org.h2.test.db.TestSelectCountNonNullColumn;
import org.h2.test.db.TestSequence;
import org.h2.test.db.TestSessionsLocks;
import org.h2.test.db.TestSetCollation;
import org.h2.test.db.TestShow;
import org.h2.test.db.TestSpaceReuse;
import org.h2.test.db.TestSpatial;
......@@ -172,6 +173,7 @@ import org.h2.test.unit.TestBitField;
import org.h2.test.unit.TestBitStream;
import org.h2.test.unit.TestBnf;
import org.h2.test.unit.TestCache;
import org.h2.test.unit.TestCharsetCollator;
import org.h2.test.unit.TestClearReferences;
import org.h2.test.unit.TestCollation;
import org.h2.test.unit.TestCompress;
......@@ -419,6 +421,10 @@ java org.h2.test.TestAll timer
*/
String cacheType;
/** If not null the database should be opened with the collation parameter */
public String collation;
/**
* The AB-BA locking detector.
*/
......@@ -431,6 +437,7 @@ java org.h2.test.TestAll timer
private Server server;
/**
* Run all tests.
*
......@@ -791,6 +798,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1`
addTest(new TestUpdatableResultSet());
addTest(new TestZloty());
addTest(new TestCustomDataTypesHandler());
addTest(new TestSetCollation());
// jdbcx
addTest(new TestConnectionPool());
......@@ -875,6 +883,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1`
addTest(new TestBitStream());
addTest(new TestBnf());
addTest(new TestCache());
addTest(new TestCharsetCollator());
addTest(new TestClearReferences());
addTest(new TestCollation());
addTest(new TestCompress());
......@@ -1102,6 +1111,7 @@ kill -9 `jps -l | grep "org.h2.test." | cut -d " " -f 1`
appendIf(buff, stopOnError, "stopOnError");
appendIf(buff, defrag, "defrag");
appendIf(buff, splitFileSystem, "split");
appendIf(buff, collation != null, collation);
return buff.toString();
}
......
......@@ -335,6 +335,9 @@ public abstract class TestBase {
if (config.defrag) {
url = addOption(url, "DEFRAG_ALWAYS", "TRUE");
}
if (config.collation != null) {
url = addOption(url, "COLLATION", config.collation);
}
return "jdbc:h2:" + url;
}
......
package org.h2.test.db;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.h2.jdbc.JdbcSQLException;
import org.h2.test.TestBase;
public class TestSetCollation extends TestBase {
private static final String[] TEST_STRINGS = new String[]{"A", "Ä", "AA", "B", "$", "1A", null};
public static final String DB_NAME = "collator";
/**
* Run just this test.
*
* @param a ignored
*/
public static void main(String... a) throws Exception {
TestBase.createCaller().init().test();
}
@Override
public void test() throws Exception {
testDefaultCollator();
testCp500Collator();
testDeCollator();
testUrlParameter();
testReopenDatabase();
testReopenDatabaseWithUrlParameter();
testReopenDatabaseWithDifferentCollationInUrl();
testReopenDatabaseWithSameCollationInUrl();
}
private void testDefaultCollator() throws Exception {
assertEquals(Arrays.asList(null, "$", "1A", "A", "AA", "B", "Ä"), orderedWithCollator(null));
}
private void testDeCollator() throws Exception {
assertEquals(Arrays.asList(null, "$", "1A", "A", "Ä", "AA", "B"), orderedWithCollator("DE"));
assertEquals(Arrays.asList(null, "$", "1A", "A", "Ä", "AA", "B"), orderedWithCollator("DEFAULT_DE"));
}
private void testCp500Collator() throws Exception {
// IBM z/OS codepage
assertEquals(Arrays.asList(null, "A", "AA", "B", "1A", "$", "Ä"),
orderedWithCollator("CHARSET_CP500"));
}
private void testUrlParameter() throws Exception {
// Specifying the collator in the JDBC Url should have the same effect as setting it with a set statement
config.collation = "CHARSET_CP500";
try {
assertEquals(Arrays.asList(null, "A", "AA", "B", "1A", "$", "Ä"), orderedWithCollator(null));
} finally {
config.collation = null;
}
}
private void testReopenDatabase() throws Exception {
orderedWithCollator("DE");
try (Connection con = getConnection(DB_NAME)) {
insertValues(con, new String[]{"A", "Ä"}, 100);
assertEquals(Arrays.asList(null, "$", "1A", "A", "A", "Ä", "Ä", "AA", "B"), loadTableValues(con));
}
}
private void testReopenDatabaseWithUrlParameter() throws Exception {
config.collation = "DE";
try {
orderedWithCollator(null);
} finally {
config.collation = null;
}
// reopen the database without specifying a collation in the url. This should keep the initial collation.
try (Connection con = getConnection(DB_NAME)) {
insertValues(con, new String[]{"A", "Ä"}, 100);
assertEquals(Arrays.asList(null, "$", "1A", "A", "A", "Ä", "Ä", "AA", "B"), loadTableValues(con));
}
}
private void testReopenDatabaseWithDifferentCollationInUrl() throws Exception {
config.collation = "DE";
try {
orderedWithCollator(null);
} finally {
config.collation = null;
}
config.collation = "CHARSET_CP500";
try {
getConnection(DB_NAME);
fail();
} catch (JdbcSQLException e) {
// expected
} finally {
config.collation = null;
}
}
private void testReopenDatabaseWithSameCollationInUrl() throws Exception {
config.collation = "DE";
try {
orderedWithCollator(null);
} finally {
config.collation = null;
}
config.collation = "DE";
try (Connection con = getConnection(DB_NAME)) {
insertValues(con, new String[]{"A", "Ä"}, 100);
assertEquals(Arrays.asList(null, "$", "1A", "A", "A", "Ä", "Ä", "AA", "B"), loadTableValues(con));
} finally {
config.collation = null;
}
}
private List<String> orderedWithCollator(String collator) throws SQLException {
deleteDb(DB_NAME);
try (Connection con = getConnection(DB_NAME); Statement statement = con.createStatement()) {
;
if (collator != null) {
statement.execute("SET COLLATION " + collator);
}
statement.execute("CREATE TABLE charsettable(id INT PRIMARY KEY, testvalue VARCHAR(50))");
insertValues(con, TEST_STRINGS, 1);
return loadTableValues(con);
}
}
private void insertValues(Connection con, String[] values, int startId) throws SQLException {
PreparedStatement ps = con.prepareStatement("INSERT INTO charsettable VALUES (?, ?)");
int id = startId;
for (String value : values) {
ps.setInt(1, id++);
ps.setString(2, value);
ps.execute();
}
ps.close();
}
private List<String> loadTableValues(Connection con) throws SQLException {
List<String> results = new ArrayList<>();
Statement statement = con.createStatement();
ResultSet resultSet = statement.executeQuery("select testvalue from charsettable order by testvalue");
while (resultSet.next()) {
results.add(resultSet.getString(1));
}
statement.close();
return results;
}
}
package org.h2.test.unit;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.text.Collator;
import org.h2.test.TestBase;
import org.h2.value.CharsetCollator;
import org.h2.value.CompareMode;
/**
* Unittest for org.h2.value.CharsetCollator
*/
public class TestCharsetCollator extends TestBase {
private CharsetCollator cp500Collator = new CharsetCollator(Charset.forName("cp500"));
private CharsetCollator utf8Collator = new CharsetCollator(Charset.forName("UTF-8"));
/**
* Run just this test.
*
* @param a ignored
*/
public static void main(String... a) throws Exception {
TestBase.createCaller().init().test();
}
@Override
public void test() throws Exception {
testBasicComparison();
testNumberToCharacterComparison();
testLengthComparison();
testCreationFromCompareMode();
testCreationFromCompareModeWithInvalidCharset();
}
private void testCreationFromCompareModeWithInvalidCharset() {
try {
CompareMode.getCollator("CHARSET_INVALID");
fail();
} catch (UnsupportedCharsetException e) {
// expected
}
}
private void testCreationFromCompareMode() {
Collator utf8Col = CompareMode.getCollator("CHARSET_UTF-8");
assertTrue(utf8Col instanceof CharsetCollator);
assertEquals(((CharsetCollator) utf8Col).getCharset(), Charset.forName("UTF-8"));
}
private void testBasicComparison() {
assertTrue(cp500Collator.compare("A", "B") < 0);
assertTrue(cp500Collator.compare("AA", "AB") < 0);
}
private void testLengthComparison() {
assertTrue(utf8Collator.compare("AA", "A") > 0);
}
private void testNumberToCharacterComparison() {
assertTrue(cp500Collator.compare("A", "1") < 0);
assertTrue(utf8Collator.compare("A", "1") > 0);
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论