提交 6839f393 authored 作者: Thomas Mueller's avatar Thomas Mueller

Native fulltext search: the characters '<', '>', and '\' are now also whitespace…

Native fulltext search: the characters '<', '>', and '\' are now also whitespace characters. Also, the list of whitespace characters can be changed using FullText.setWhitespaceChars(conn, ...)
上级 d78e0337
......@@ -116,6 +116,7 @@ public class FullText {
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
+ ".MAP(ROWID INT, WORDID INT, PRIMARY KEY(WORDID, ROWID))");
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".IGNORELIST(LIST VARCHAR)");
stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".SETTINGS(KEY VARCHAR PRIMARY KEY, VALUE VARCHAR)");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullText.class.getName() + ".createIndex\"");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + FullText.class.getName() + ".dropIndex\"");
stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullText.class.getName() + ".search\"");
......@@ -128,6 +129,14 @@ public class FullText {
String commaSeparatedList = rs.getString(1);
setIgnoreList(setting, commaSeparatedList);
}
rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".SETTINGS");
while (rs.next()) {
String key = rs.getString(1);
if ("whitespaceChars".equals(key)) {
String value = rs.getString(2);
setting.setWhitespaceChars(value);
}
}
rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS");
HashMap<String, Integer> map = setting.getWordList();
while (rs.next()) {
......@@ -323,6 +332,28 @@ public class FullText {
}
}
/**
* Change the whitespace characters. The whitespace characters are used to
* separate words. If indexes already exist at the time this list is
* changed, reindex must be called.
*
* @param conn the connection
* @param whitespaceChars the list of characters
*/
public static void setWhitespaceChars(Connection conn, String whitespaceChars) throws SQLException {
try {
init(conn);
FullTextSettings setting = FullTextSettings.getInstance(conn);
setting.setWhitespaceChars(whitespaceChars);
PreparedStatement prep = conn.prepareStatement("MERGE INTO " + SCHEMA + ".SETTINGS VALUES(?, ?)");
prep.setString(1, "whitespaceChars");
prep.setString(2, whitespaceChars);
prep.execute();
} catch (DbException e) {
throw DbException.toSQLException(e);
}
}
/**
* INTERNAL.
* Convert the object to a string.
......@@ -639,7 +670,8 @@ public class FullText {
StreamTokenizer tokenizer = new StreamTokenizer(reader);
tokenizer.resetSyntax();
tokenizer.wordChars(' ' + 1, 255);
for (char ch : " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]".toCharArray()) {
char[] whitespaceChars = setting.getWhitespaceChars().toCharArray();
for (char ch : whitespaceChars) {
tokenizer.whitespaceChars(ch, ch);
}
try {
......@@ -668,7 +700,8 @@ public class FullText {
* @param text the text
*/
protected static void addWords(FullTextSettings setting, HashSet<String> set, String text) {
StringTokenizer tokenizer = new StringTokenizer(text, " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]");
String whitespaceChars = setting.getWhitespaceChars();
StringTokenizer tokenizer = new StringTokenizer(text, whitespaceChars);
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken();
word = setting.convertWord(word);
......
......@@ -51,6 +51,11 @@ public class FullTextSettings {
*/
protected SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>> cache = new SoftHashMap<Connection, SoftHashMap<String, PreparedStatement>>();
/**
* The whitespace characters.
*/
protected String whitespaceChars = " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]<>\\";
/**
* Create a new instance.
*/
......@@ -210,4 +215,12 @@ public class FullTextSettings {
SETTINGS.clear();
}
protected void setWhitespaceChars(String whitespaceChars) {
this.whitespaceChars = whitespaceChars;
}
protected String getWhitespaceChars() {
return whitespaceChars;
}
}
......@@ -83,8 +83,9 @@ public class TestFullText extends TestBase {
stat.execute("CREATE ALIAS IF NOT EXISTS FT_INIT FOR \"org.h2.fulltext.FullText.init\"");
stat.execute("CALL FT_INIT()");
FullText.setIgnoreList(conn, "to,this");
FullText.setWhitespaceChars(conn, " ,.-");
stat.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR)");
stat.execute("INSERT INTO TEST VALUES(1, 'Welcome to this world')");
stat.execute("INSERT INTO TEST VALUES(1, 'Welcome to this world, One_Word')");
stat.execute("CALL FT_CREATE_INDEX('PUBLIC', 'TEST', NULL)");
ResultSet rs;
rs = stat.executeQuery("SELECT * FROM FT_SEARCH('Welcome', 0, 0)");
......@@ -93,6 +94,10 @@ public class TestFullText extends TestBase {
assertEquals("SCORE", rs.getMetaData().getColumnLabel(2));
assertEquals("\"PUBLIC\".\"TEST\" WHERE \"ID\"=1", rs.getString(1));
assertEquals("1.0", rs.getString(2));
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('One', 0, 0)");
assertFalse(rs.next());
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('One_Word', 0, 0)");
assertTrue(rs.next());
rs = stat.executeQuery("SELECT * FROM FT_SEARCH_DATA('Welcome', 0, 0)");
assertTrue(rs.next());
assertEquals("SCHEMA", rs.getMetaData().getColumnLabel(1));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论