提交 e0a0cc37 authored 作者: Thomas Mueller's avatar Thomas Mueller

The native fulltext search now supports streaming CLOB data.

上级 77a5d9e6
...@@ -18,7 +18,8 @@ Change Log ...@@ -18,7 +18,8 @@ Change Log
<h1>Change Log</h1> <h1>Change Log</h1>
<h2>Next Version (unreleased)</h2> <h2>Next Version (unreleased)</h2>
<ul><li>If the database URL ends with ;PAGE_STORE=TRUE and a database in <ul><li>The native fulltext search now supports streaming CLOB data.
</li><li>If the database URL ends with ;PAGE_STORE=TRUE and a database in
the old format exists, it is automatically converted to the new page store format the old format exists, it is automatically converted to the new page store format
if possible. A backup of the database is created first. Automatic conversion is not supported if possible. A backup of the database is created first. Automatic conversion is not supported
if the database was not closed normally (if it contains uncommitted transactions). if the database was not closed normally (if it contains uncommitted transactions).
......
...@@ -41,7 +41,6 @@ See also <a href="build.html#providing_patches">Providing Patches</a>. ...@@ -41,7 +41,6 @@ See also <a href="build.html#providing_patches">Providing Patches</a>.
<h2>Priority 2</h2> <h2>Priority 2</h2>
<ul> <ul>
<li>Improve test code coverage <li>Improve test code coverage
</li><li>Fulltext search: support streaming CLOB data.
</li><li>Enable warning for 'Local variable declaration hides another field or variable'. </li><li>Enable warning for 'Local variable declaration hides another field or variable'.
</li><li>Test multi-threaded in-memory db access </li><li>Test multi-threaded in-memory db access
</li><li>MVCC: select for update should only lock the selected rows. </li><li>MVCC: select for update should only lock the selected rows.
...@@ -462,6 +461,7 @@ See also <a href="build.html#providing_patches">Providing Patches</a>. ...@@ -462,6 +461,7 @@ See also <a href="build.html#providing_patches">Providing Patches</a>.
</li><li>Issue 134: IBM DB2 compatibility: session global variables. </li><li>Issue 134: IBM DB2 compatibility: session global variables.
</li><li>FTL_SET_OPTION(keyString, valueString) with key stopWords at first. </li><li>FTL_SET_OPTION(keyString, valueString) with key stopWords at first.
</li><li>Pluggable access control mechanism. </li><li>Pluggable access control mechanism.
</li><li>Fulltext search (Lucene): support streaming CLOB data.
</li></ul> </li></ul>
<h2>Not Planned</h2> <h2>Not Planned</h2>
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -8,6 +8,7 @@ package org.h2.fulltext; ...@@ -8,6 +8,7 @@ package org.h2.fulltext;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StreamTokenizer;
import java.sql.Clob; import java.sql.Clob;
import java.sql.Connection; import java.sql.Connection;
import java.sql.DatabaseMetaData; import java.sql.DatabaseMetaData;
...@@ -22,7 +23,6 @@ import java.util.HashMap; ...@@ -22,7 +23,6 @@ import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.h2.api.CloseListener; import org.h2.api.CloseListener;
import org.h2.api.Trigger; import org.h2.api.Trigger;
import org.h2.command.Parser; import org.h2.command.Parser;
...@@ -601,6 +601,38 @@ public class FullText { ...@@ -601,6 +601,38 @@ public class FullText {
* *
* @param setting the fulltext settings * @param setting the fulltext settings
* @param set the hash set * @param set the hash set
* @param reader the reader
*/
static void addWords(FullTextSettings setting, HashSet<String> set, Reader reader) throws SQLException {
StreamTokenizer tokenizer = new StreamTokenizer(reader);
tokenizer.resetSyntax();
tokenizer.wordChars(' ' + 1, 255);
for (char ch : " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]".toCharArray()) {
tokenizer.whitespaceChars(ch, ch);
}
try {
while (true) {
int token = tokenizer.nextToken();
if (token == StreamTokenizer.TT_EOF) {
break;
} else if (token == StreamTokenizer.TT_WORD) {
String word = tokenizer.sval;
word = setting.convertWord(word);
if (word != null) {
set.add(word);
}
}
}
} catch (IOException e) {
throw Message.convertIOException(e, "Tokenizer error");
}
}
/**
* Add all words in the given text to the hash set.
*
* @param setting the fulltext settings
* @param set the hash set
* @param text the text * @param text the text
*/ */
static void addWords(FullTextSettings setting, HashSet<String> set, String text) { static void addWords(FullTextSettings setting, HashSet<String> set, String text) {
...@@ -871,8 +903,20 @@ public class FullText { ...@@ -871,8 +903,20 @@ public class FullText {
HashSet<String> words = New.hashSet(); HashSet<String> words = New.hashSet();
for (int i = 0; i < index.indexColumns.length; i++) { for (int i = 0; i < index.indexColumns.length; i++) {
int idx = index.indexColumns[i]; int idx = index.indexColumns[i];
String data = asString(row[idx], columnTypes[idx]); int type = columnTypes[idx];
addWords(setting, words, data); Object data = row[idx];
if (type == Types.CLOB && data != null) {
Reader reader;
if (data instanceof Reader) {
reader = (Reader) data;
} else {
reader = ((Clob) data).getCharacterStream();
}
addWords(setting, words, reader);
} else {
String string = asString(data, type);
addWords(setting, words, string);
}
} }
HashMap<String, Integer> allWords = setting.getWordList(); HashMap<String, Integer> allWords = setting.getWordList();
int[] wordIds = new int[words.size()]; int[] wordIds = new int[words.size()];
......
...@@ -6,13 +6,13 @@ ...@@ -6,13 +6,13 @@
*/ */
package org.h2.test.db; package org.h2.test.db;
import java.io.Reader;
import java.sql.Connection; import java.sql.Connection;
import java.sql.PreparedStatement; import java.sql.PreparedStatement;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement; import java.sql.Statement;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.h2.fulltext.FullText; import org.h2.fulltext.FullText;
import org.h2.store.fs.FileSystem; import org.h2.store.fs.FileSystem;
import org.h2.test.TestBase; import org.h2.test.TestBase;
...@@ -36,6 +36,7 @@ public class TestFullText extends TestBase { ...@@ -36,6 +36,7 @@ public class TestFullText extends TestBase {
if (config.memory) { if (config.memory) {
return; return;
} }
testStreamLob();
test(false, "VARCHAR"); test(false, "VARCHAR");
test(false, "CLOB"); test(false, "CLOB");
testPerformance(false); testPerformance(false);
...@@ -58,6 +59,38 @@ public class TestFullText extends TestBase { ...@@ -58,6 +59,38 @@ public class TestFullText extends TestBase {
deleteDb("fullTextReopen"); deleteDb("fullTextReopen");
} }
private void testStreamLob() throws SQLException {
deleteDb("fullText");
Connection conn = getConnection("fullText");
Statement stat = conn.createStatement();
stat.execute("CREATE ALIAS IF NOT EXISTS FT_INIT FOR \"org.h2.fulltext.FullText.init\"");
stat.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, DATA CLOB)");
FullText.createIndex(conn, "PUBLIC", "TEST", null);
stat.execute("insert into test values(0, 'Hello World!')");
PreparedStatement prep = conn.prepareStatement("insert into test values(1, ?)");
final int length = 1024 * 1024;
prep.setCharacterStream(1, new Reader() {
int remaining = length;
public void close() {
// ignore
}
public int read(char[] buff, int off, int len) {
if (remaining >= len) {
remaining -= len;
return len;
}
remaining = -1;
return -1;
}
}, length);
prep.execute();
ResultSet rs = stat.executeQuery("SELECT * FROM FT_SEARCH('World', 0, 0)");
assertTrue(rs.next());
FullText.dropAll(conn);
conn.close();
deleteDb("fullText");
}
private void testCreateDrop() throws SQLException { private void testCreateDrop() throws SQLException {
deleteDb("fullText"); deleteDb("fullText");
FileSystem.getInstance(baseDir).deleteRecursive(baseDir + "/fullText", false); FileSystem.getInstance(baseDir).deleteRecursive(baseDir + "/fullText", false);
......
...@@ -46,6 +46,64 @@ Please provide any additional information below. ...@@ -46,6 +46,64 @@ Please provide any additional information below.
----------------- -----------------
ClassCastException
There is a known problem in version 1.2.120 that can cause a ClassCastException. Did you use version 1.2.120 with this database? If not, I have a few more questions:
- Could you send the full stack trace of the exception including message text?
- What is your database URL?
- Did you use multiple connections?
- A workarounds is: use the tool org.h2.tools.Recover to create
the SQL script file, and then re-create the database using this script.
Does it work when you do this?
- With which version of H2 was this database created?
You can find it out using:
select * from information_schema.settings where name='CREATE_BUILD'
or have a look in the SQL script created by the recover tool.
- Do you use any settings or special features (for example, the setting
LOG=0, or two phase commit, linked tables, cache settings)?
- Is the application multi-threaded?
- What operating system, file system, and virtual machine
(java -version) do you use?
- Is it (or was it at some point) a networked file system?
- How big is the database (file sizes)?
- Is the database usually closed normally, or is process terminated
forcefully or the computer switched off?
- Is it possible to reproduce this problem using a fresh database
(sometimes, or always)?
- Are there any other exceptions (maybe in the .trace.db file)?
Could you send them please?
- Do you still have any .trace.db files, and if yes could you send them?
- Could you send the .h2.db file where this exception occurs?
-----------------
Hi,
I have a few questions:
- The database URL?
- With which version of H2 was this database created?
You can find it out using:
select * from information_schema.settings where name='CREATE_BUILD'
- Did you use multiple connections?
- Do you use any settings or special features (for example, the setting
LOG=0, or two phase commit, linked tables, cache settings)?
- Is the application multi-threaded?
- What operating system, file system, and virtual machine
(java -version) do you use?
- How big is the database (file sizes)?
- Is the database usually closed normally, or is process terminated
forcefully or the computer switched off?
- Are there any other exceptions (maybe in the .trace.db file)?
Could you send them please?
- Do you still have any .trace.db files, and if yes could you send them?
- Could you send the .h2.db file where this exception occurs?
Corrupted database Corrupted database
I am sorry to say that, but it looks like a corruption problem. I am very interested in analyzing and solving this problem. Corruption problems have top priority for me. I have a few questions: I am sorry to say that, but it looks like a corruption problem. I am very interested in analyzing and solving this problem. Corruption problems have top priority for me. I have a few questions:
......
...@@ -618,3 +618,4 @@ notranslate vince bonfanti alphabetically sysdummy sysibm activation ...@@ -618,3 +618,4 @@ notranslate vince bonfanti alphabetically sysdummy sysibm activation
deactivation concatenating reproducing black railroads railroad radius moz deactivation concatenating reproducing black railroads railroad radius moz
imageio argb bilinear rendering stroke interpolation flip diagrams draw imageio argb bilinear rendering stroke interpolation flip diagrams draw
delim overlap subselect bitwise dclassifier dgenerate compacts chartrand phane delim overlap subselect bitwise dclassifier dgenerate compacts chartrand phane
sval
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论