提交 6839f393 authored 作者: Thomas Mueller's avatar Thomas Mueller

Native fulltext search: the characters '<', '>', and '\' are now also whitespace…

Native fulltext search: the characters '<', '>', and '\' are now also whitespace characters. Also, the list of whitespace characters can be changed using FullText.setWhitespaceChars(conn, ...)
上级 d78e0337
...@@ -116,6 +116,7 @@ public class FullText { ...@@ -116,6 +116,7 @@ public class FullText {
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
+ ".MAP(ROWID INT, WORDID INT, PRIMARY KEY(WORDID, ROWID))"); + ".MAP(ROWID INT, WORDID INT, PRIMARY KEY(WORDID, ROWID))");
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".IGNORELIST(LIST VARCHAR)"); stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".IGNORELIST(LIST VARCHAR)");
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".SETTINGS(KEY VARCHAR PRIMARY KEY, VALUE VARCHAR)");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullText.class.getName() + ".createIndex\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullText.class.getName() + ".createIndex\"");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + FullText.class.getName() + ".dropIndex\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + FullText.class.getName() + ".dropIndex\"");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullText.class.getName() + ".search\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullText.class.getName() + ".search\"");
...@@ -128,6 +129,14 @@ public class FullText { ...@@ -128,6 +129,14 @@ public class FullText {
String commaSeparatedList = rs.getString(1); String commaSeparatedList = rs.getString(1);
setIgnoreList(setting, commaSeparatedList); setIgnoreList(setting, commaSeparatedList);
} }
rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".SETTINGS");
while (rs.next()) {
String key = rs.getString(1);
if ("whitespaceChars".equals(key)) {
String value = rs.getString(2);
setting.setWhitespaceChars(value);
}
}
rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS"); rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS");
HashMap<String, Integer> map = setting.getWordList(); HashMap<String, Integer> map = setting.getWordList();
while (rs.next()) { while (rs.next()) {
...@@ -323,6 +332,28 @@ public class FullText { ...@@ -323,6 +332,28 @@ public class FullText {
} }
} }
/**
* Change the whitespace characters. The whitespace characters are used to
* separate words. If indexes already exist at the time this list is
* changed, reindex must be called.
*
* @param conn the connection
* @param whitespaceChars the list of characters
*/
public static void setWhitespaceChars(Connection conn, String whitespaceChars) throws SQLException {
try {
init(conn);
FullTextSettings setting = FullTextSettings.getInstance(conn);
setting.setWhitespaceChars(whitespaceChars);
PreparedStatement prep = conn.prepareStatement("MERGE INTO " + SCHEMA + ".SETTINGS VALUES(?, ?)");
prep.setString(1, "whitespaceChars");
prep.setString(2, whitespaceChars);
prep.execute();
} catch (DbException e) {
throw DbException.toSQLException(e);
}
}
/** /**
* INTERNAL. * INTERNAL.
* Convert the object to a string. * Convert the object to a string.
...@@ -639,7 +670,8 @@ public class FullText { ...@@ -639,7 +670,8 @@ public class FullText {
StreamTokenizer tokenizer = new StreamTokenizer(reader); StreamTokenizer tokenizer = new StreamTokenizer(reader);
tokenizer.resetSyntax(); tokenizer.resetSyntax();
tokenizer.wordChars(' ' + 1, 255); tokenizer.wordChars(' ' + 1, 255);
for (char ch : " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]".toCharArray()) { char[] whitespaceChars = setting.getWhitespaceChars().toCharArray();
for (char ch : whitespaceChars) {
tokenizer.whitespaceChars(ch, ch); tokenizer.whitespaceChars(ch, ch);
} }
try { try {
...@@ -668,7 +700,8 @@ public class FullText { ...@@ -668,7 +700,8 @@ public class FullText {
* @param text the text * @param text the text
*/ */
protected static void addWords(FullTextSettings setting, HashSet<String> set, String text) { protected static void addWords(FullTextSettings setting, HashSet<String> set, String text) {
StringTokenizer tokenizer = new StringTokenizer(text, " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]"); String whitespaceChars = setting.getWhitespaceChars();
StringTokenizer tokenizer = new StringTokenizer(text, whitespaceChars);
while (tokenizer.hasMoreTokens()) { while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken(); String word = tokenizer.nextToken();
word = setting.convertWord(word); word = setting.convertWord(word);
......
...@@ -51,6 +51,11 @@ public class FullTextSettings { ...@@ -51,6 +51,11 @@ public class FullTextSettings {
*/ */
protected SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>> cache = new SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>>(); protected SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>> cache = new SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>>();
/**
* The whitespace characters.
*/
protected String whitespaceChars = " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]<>\\";
/** /**
* Create a new instance. * Create a new instance.
*/ */
...@@ -210,4 +215,12 @@ public class FullTextSettings { ...@@ -210,4 +215,12 @@ public class FullTextSettings {
SETTINGS.clear(); SETTINGS.clear();
} }
protected void setWhitespaceChars(String whitespaceChars) {
this.whitespaceChars = whitespaceChars;
}
protected String getWhitespaceChars() {
return whitespaceChars;
}
} }
...@@ -83,8 +83,9 @@ public class TestFullText extends TestBase { ...@@ -83,8 +83,9 @@ public class TestFullText extends TestBase {
stat.execute("CREATE ALIAS IF NOT EXISTS FT_INIT FOR \"org.h2.fulltext.FullText.init\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_INIT FOR \"org.h2.fulltext.FullText.init\"");
stat.execute("CALL FT_INIT()"); stat.execute("CALL FT_INIT()");
FullText.setIgnoreList(conn, "to,this"); FullText.setIgnoreList(conn, "to,this");
FullText.setWhitespaceChars(conn, " ,.-");
stat.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR)"); stat.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR)");
stat.execute("INSERT INTO TEST VALUES(1, 'Welcome to this world')"); stat.execute("INSERT INTO TEST VALUES(1, 'Welcome to this world, One_Word')");
stat.execute("CALL FT_CREATE_INDEX('PUBLIC', 'TEST', NULL)"); stat.execute("CALL FT_CREATE_INDEX('PUBLIC', 'TEST', NULL)");
ResultSet rs; ResultSet rs;
rs = stat.executeQuery("SELECT * FROM FT_SEARCH('Welcome', 0, 0)"); rs = stat.executeQuery("SELECT * FROM FT_SEARCH('Welcome', 0, 0)");
...@@ -93,6 +94,10 @@ public class TestFullText extends TestBase { ...@@ -93,6 +94,10 @@ public class TestFullText extends TestBase {
assertEquals("SCORE", rs.getMetaData().getColumnLabel(2)); assertEquals("SCORE", rs.getMetaData().getColumnLabel(2));
assertEquals("\"PUBLIC\".\"TEST\" WHERE \"ID\"=1", rs.getString(1)); assertEquals("\"PUBLIC\".\"TEST\" WHERE \"ID\"=1", rs.getString(1));
assertEquals("1.0", rs.getString(2)); assertEquals("1.0", rs.getString(2));
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('One', 0, 0)");
assertFalse(rs.next());
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('One_Word', 0, 0)");
assertTrue(rs.next());
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('Welcome', 0, 0)"); rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('Welcome', 0, 0)");
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals("SCHEMA", rs.getMetaData().getColumnLabel(1)); assertEquals("SCHEMA", rs.getMetaData().getColumnLabel(1));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论