提交 eff9d795 authored 作者: Thomas Mueller's avatar Thomas Mueller

CSV reading is now faster.

上级 f733494f
...@@ -18,7 +18,8 @@ Change Log ...@@ -18,7 +18,8 @@ Change Log
<h1>Change Log</h1> <h1>Change Log</h1>
<h2>Next Version (unreleased)</h2> <h2>Next Version (unreleased)</h2>
<ul><li>- <ul><li>SimpleResultSet.newInstance(SimpleRowSource rs) did not work.
</li><li>CSV reading is now faster.
</li></ul> </li></ul>
<h2>Version 1.1.116 (2009-07-18)</h2> <h2>Version 1.1.116 (2009-07-18)</h2>
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
*/ */
package org.h2.bnf; package org.h2.bnf;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
...@@ -81,7 +80,6 @@ public class Bnf { ...@@ -81,7 +80,6 @@ public class Bnf {
} }
private void parse(Reader csv) throws SQLException, IOException { private void parse(Reader csv) throws SQLException, IOException {
csv = new BufferedReader(csv);
Rule functions = null; Rule functions = null;
statements = New.arrayList(); statements = New.arrayList();
ResultSet rs = Csv.getInstance().read(csv, null); ResultSet rs = Csv.getInstance().read(csv, null);
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
*/ */
package org.h2.table; package org.h2.table;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
...@@ -920,7 +919,7 @@ public class MetaTable extends Table { ...@@ -920,7 +919,7 @@ public class MetaTable extends Table {
String resource = "/org/h2/res/help.csv"; String resource = "/org/h2/res/help.csv";
try { try {
byte[] data = Resources.get(resource); byte[] data = Resources.get(resource);
Reader reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(data))); Reader reader = new InputStreamReader(new ByteArrayInputStream(data));
ResultSet rs = Csv.getInstance().read(reader, null); ResultSet rs = Csv.getInstance().read(reader, null);
for (int i = 0; rs.next(); i++) { for (int i = 0; rs.next(); i++) {
add(rows, new String[] { add(rows, new String[] {
......
...@@ -732,7 +732,7 @@ public class Recover extends Tool implements DataHandler { ...@@ -732,7 +732,7 @@ public class Recover extends Tool implements DataHandler {
} catch (Exception e) { } catch (Exception e) {
writeError(writer, e); writeError(writer, e);
} }
DataPage s = DataPage.create(this, 128); Data s = Data.create(this, 128);
store.seek(0); store.seek(0);
store.readFully(s.getBytes(), 0, 128); store.readFully(s.getBytes(), 0, 128);
s.setPos(48); s.setPos(48);
...@@ -747,7 +747,7 @@ public class Recover extends Tool implements DataHandler { ...@@ -747,7 +747,7 @@ public class Recover extends Tool implements DataHandler {
writer.println("-- ERROR: page size; using " + pageSize); writer.println("-- ERROR: page size; using " + pageSize);
} }
int pageCount = (int) (length / pageSize); int pageCount = (int) (length / pageSize);
s = DataPage.create(this, pageSize); s = Data.create(this, pageSize);
int logFirstTrunkPage = 0, logFirstDataPage = 0; int logFirstTrunkPage = 0, logFirstDataPage = 0;
for (int i = 1;; i++) { for (int i = 1;; i++) {
if (i == 3) { if (i == 3) {
...@@ -778,10 +778,10 @@ public class Recover extends Tool implements DataHandler { ...@@ -778,10 +778,10 @@ public class Recover extends Tool implements DataHandler {
writer.println("-- firstTrunkPage: " + logFirstTrunkPage + writer.println("-- firstTrunkPage: " + logFirstTrunkPage +
" firstDataPage: " + logFirstDataPage); " firstDataPage: " + logFirstDataPage);
s = DataPage.create(this, pageSize); s = Data.create(this, pageSize);
int free = 0; int free = 0;
for (long page = 3; page < pageCount; page++) { for (long page = 3; page < pageCount; page++) {
s = DataPage.create(this, pageSize); s = Data.create(this, pageSize);
store.seek(page * pageSize); store.seek(page * pageSize);
store.readFully(s.getBytes(), 0, pageSize); store.readFully(s.getBytes(), 0, pageSize);
int parentPageId = s.readInt(); int parentPageId = s.readInt();
...@@ -814,7 +814,7 @@ public class Recover extends Tool implements DataHandler { ...@@ -814,7 +814,7 @@ public class Recover extends Tool implements DataHandler {
break; break;
} }
case Page.TYPE_BTREE_NODE: case Page.TYPE_BTREE_NODE:
writer.println("-- page " + page + ": btree node" + (last ? "(last)" : "")); writer.println("-- page " + page + ": b-tree node" + (last ? "(last)" : ""));
if (trace) { if (trace) {
dumpPageBtreeNode(writer, s, !last); dumpPageBtreeNode(writer, s, !last);
} }
...@@ -822,7 +822,7 @@ public class Recover extends Tool implements DataHandler { ...@@ -822,7 +822,7 @@ public class Recover extends Tool implements DataHandler {
case Page.TYPE_BTREE_LEAF: { case Page.TYPE_BTREE_LEAF: {
setStorage(s.readInt()); setStorage(s.readInt());
int entries = s.readShortInt(); int entries = s.readShortInt();
writer.println("-- page " + page + ": btree leaf " + (last ? "(last)" : "") + " table: " + storageId + " entries: " + entries); writer.println("-- page " + page + ": b-tree leaf " + (last ? "(last)" : "") + " table: " + storageId + " entries: " + entries);
if (trace) { if (trace) {
dumpPageBtreeLeaf(writer, s, entries, !last); dumpPageBtreeLeaf(writer, s, entries, !last);
} }
...@@ -1030,16 +1030,21 @@ public class Recover extends Tool implements DataHandler { ...@@ -1030,16 +1030,21 @@ public class Recover extends Tool implements DataHandler {
} }
} }
private void dumpPageBtreeNode(PrintWriter writer, DataPage s, boolean positionOnly) { private void dumpPageBtreeNode(PrintWriter writer, Data s, boolean positionOnly) {
int entryCount = s.readShortInt(); int entryCount = s.readShortInt();
int rowCount = s.readInt(); int rowCount = s.readInt();
int[] children = new int[entryCount + 1]; int[] children = new int[entryCount + 1];
int[] offsets = new int[entryCount]; int[] offsets = new int[entryCount];
children[entryCount] = s.readInt(); children[entryCount] = s.readInt();
int empty = Integer.MAX_VALUE;
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
children[i] = s.readInt(); children[i] = s.readInt();
offsets[i] = s.readInt(); int off = s.readInt();
empty = Math.min(off, empty);
offsets[i] = off;
} }
empty = empty - s.length();
writer.println("-- empty: " + empty);
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
int off = offsets[i]; int off = offsets[i];
s.setPos(off); s.setPos(off);
...@@ -1060,7 +1065,7 @@ public class Recover extends Tool implements DataHandler { ...@@ -1060,7 +1065,7 @@ public class Recover extends Tool implements DataHandler {
writer.println("-- [" + entryCount + "] child: " + children[entryCount] + " rowCount: " + rowCount); writer.println("-- [" + entryCount + "] child: " + children[entryCount] + " rowCount: " + rowCount);
} }
private int dumpPageFreeList(PrintWriter writer, DataPage s, int pageSize, long pageId, long pageCount) { private int dumpPageFreeList(PrintWriter writer, Data s, int pageSize, long pageId, long pageCount) {
int pagesAddressed = PageFreeList.getPagesAddressed(pageSize); int pagesAddressed = PageFreeList.getPagesAddressed(pageSize);
BitField used = new BitField(); BitField used = new BitField();
for (int i = 0; i < pagesAddressed; i += 8) { for (int i = 0; i < pagesAddressed; i += 8) {
...@@ -1087,11 +1092,16 @@ public class Recover extends Tool implements DataHandler { ...@@ -1087,11 +1092,16 @@ public class Recover extends Tool implements DataHandler {
return free; return free;
} }
private void dumpPageBtreeLeaf(PrintWriter writer, DataPage s, int entryCount, boolean positionOnly) { private void dumpPageBtreeLeaf(PrintWriter writer, Data s, int entryCount, boolean positionOnly) {
int[] offsets = new int[entryCount]; int[] offsets = new int[entryCount];
int empty = Integer.MAX_VALUE;
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
offsets[i] = s.readShortInt(); int off = s.readShortInt();
empty = Math.min(off, empty);
offsets[i] = off;
} }
empty = empty - s.length();
writer.println("-- empty: " + empty);
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
int off = offsets[i]; int off = offsets[i];
s.setPos(off); s.setPos(off);
...@@ -1111,17 +1121,22 @@ public class Recover extends Tool implements DataHandler { ...@@ -1111,17 +1121,22 @@ public class Recover extends Tool implements DataHandler {
} }
} }
private void dumpPageDataLeaf(FileStore store, int pageSize, PrintWriter writer, DataPage s, boolean last, long pageId, int entryCount) throws SQLException { private void dumpPageDataLeaf(FileStore store, int pageSize, PrintWriter writer, Data s, boolean last, long pageId, int entryCount) throws SQLException {
int[] keys = new int[entryCount]; int[] keys = new int[entryCount];
int[] offsets = new int[entryCount]; int[] offsets = new int[entryCount];
long next = 0; long next = 0;
if (!last) { if (!last) {
next = s.readInt(); next = s.readInt();
} }
int empty = Integer.MAX_VALUE;
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
keys[i] = s.readInt(); keys[i] = s.readInt();
offsets[i] = s.readShortInt(); int off = s.readShortInt();
empty = Math.min(off, empty);
offsets[i] = off;
} }
empty = empty - s.length();
writer.println("-- empty: " + empty);
if (!last) { if (!last) {
DataPage s2 = DataPage.create(this, pageSize); DataPage s2 = DataPage.create(this, pageSize);
s.setPos(pageSize); s.setPos(pageSize);
......
...@@ -92,18 +92,18 @@ public class TestCsv extends TestBase { ...@@ -92,18 +92,18 @@ public class TestCsv extends TestBase {
csv.setNullString("\\N"); csv.setNullString("\\N");
ResultSet rs = csv.read(f.getPath(), null, "UTF8"); ResultSet rs = csv.read(f.getPath(), null, "UTF8");
ResultSetMetaData meta = rs.getMetaData(); ResultSetMetaData meta = rs.getMetaData();
assertEquals(meta.getColumnCount(), 4); assertEquals(4, meta.getColumnCount());
assertEquals(meta.getColumnLabel(1), "A"); assertEquals("A", meta.getColumnLabel(1));
assertEquals(meta.getColumnLabel(2), "B"); assertEquals("B", meta.getColumnLabel(2));
assertEquals(meta.getColumnLabel(3), "C"); assertEquals("C", meta.getColumnLabel(3));
assertEquals(meta.getColumnLabel(4), "D"); assertEquals("D", meta.getColumnLabel(4));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), null); assertEquals(null, rs.getString(1));
assertEquals(rs.getString(2), ""); assertEquals("", rs.getString(2));
// null is never quoted // null is never quoted
assertEquals(rs.getString(3), "\\N"); assertEquals("\\N", rs.getString(3));
// an empty string is always parsed as null // an empty string is always parsed as null
assertEquals(rs.getString(4), null); assertEquals(null, rs.getString(4));
assertFalse(rs.next()); assertFalse(rs.next());
Connection conn = getConnection("csv"); Connection conn = getConnection("csv");
...@@ -177,12 +177,12 @@ public class TestCsv extends TestBase { ...@@ -177,12 +177,12 @@ public class TestCsv extends TestBase {
assertEquals("ID|NAME 1|Hello", text); assertEquals("ID|NAME 1|Hello", text);
ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, null, '|', '')"); ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, null, '|', '')");
ResultSetMetaData meta = rs.getMetaData(); ResultSetMetaData meta = rs.getMetaData();
assertEquals(meta.getColumnCount(), 2); assertEquals(2, meta.getColumnCount());
assertEquals(meta.getColumnLabel(1), "ID"); assertEquals("ID", meta.getColumnLabel(1));
assertEquals(meta.getColumnLabel(2), "NAME"); assertEquals("NAME", meta.getColumnLabel(2));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "1"); assertEquals("1", rs.getString(1));
assertEquals(rs.getString(2), "Hello"); assertEquals("Hello", rs.getString(2));
assertFalse(rs.next()); assertFalse(rs.next());
conn.close(); conn.close();
FileUtils.delete(baseDir + "/test.csv"); FileUtils.delete(baseDir + "/test.csv");
...@@ -198,22 +198,22 @@ public class TestCsv extends TestBase { ...@@ -198,22 +198,22 @@ public class TestCsv extends TestBase {
Statement stat = conn.createStatement(); Statement stat = conn.createStatement();
ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, null, ';', '''', '\\')"); ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, null, ';', '''', '\\')");
ResultSetMetaData meta = rs.getMetaData(); ResultSetMetaData meta = rs.getMetaData();
assertEquals(meta.getColumnCount(), 2); assertEquals(2, meta.getColumnCount());
assertEquals(meta.getColumnLabel(1), "A"); assertEquals("A", meta.getColumnLabel(1));
assertEquals(meta.getColumnLabel(2), "B"); assertEquals("B", meta.getColumnLabel(2));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "It's nice"); assertEquals("It's nice", rs.getString(1));
assertEquals(rs.getString(2), "\nHello*\n"); assertEquals("\nHello*\n", rs.getString(2));
assertFalse(rs.next()); assertFalse(rs.next());
stat.execute("call csvwrite('" + baseDir + "/test2.csv', 'select * from csvread(''" + baseDir + "/test.csv'', null, null, '';'', '''''''', ''\\'')', null, '+', '*', '#')"); stat.execute("call csvwrite('" + baseDir + "/test2.csv', 'select * from csvread(''" + baseDir + "/test.csv'', null, null, '';'', '''''''', ''\\'')', null, '+', '*', '#')");
rs = stat.executeQuery("select * from csvread('" + baseDir + "/test2.csv', null, null, '+', '*', '#')"); rs = stat.executeQuery("select * from csvread('" + baseDir + "/test2.csv', null, null, '+', '*', '#')");
meta = rs.getMetaData(); meta = rs.getMetaData();
assertEquals(meta.getColumnCount(), 2); assertEquals(2, meta.getColumnCount());
assertEquals(meta.getColumnLabel(1), "A"); assertEquals("A", meta.getColumnLabel(1));
assertEquals(meta.getColumnLabel(2), "B"); assertEquals("B", meta.getColumnLabel(2));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "It's nice"); assertEquals("It's nice", rs.getString(1));
assertEquals(rs.getString(2), "\nHello*\n"); assertEquals("\nHello*\n", rs.getString(2));
assertFalse(rs.next()); assertFalse(rs.next());
conn.close(); conn.close();
FileUtils.delete(baseDir + "/test.csv"); FileUtils.delete(baseDir + "/test.csv");
...@@ -227,8 +227,8 @@ public class TestCsv extends TestBase { ...@@ -227,8 +227,8 @@ public class TestCsv extends TestBase {
stat.execute("call csvwrite('" + baseDir + "/test.csv', 'select 1 id, ''Hello'' name', 'utf-8', '|')"); stat.execute("call csvwrite('" + baseDir + "/test.csv', 'select 1 id, ''Hello'' name', 'utf-8', '|')");
ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, 'utf-8', '|')"); ResultSet rs = stat.executeQuery("select * from csvread('" + baseDir + "/test.csv', null, 'utf-8', '|')");
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getInt(1), 1); assertEquals(1, rs.getInt(1));
assertEquals(rs.getString(2), "Hello"); assertEquals("Hello", rs.getString(2));
assertFalse(rs.next()); assertFalse(rs.next());
new File(baseDir + "/test.csv").delete(); new File(baseDir + "/test.csv").delete();
...@@ -250,12 +250,12 @@ public class TestCsv extends TestBase { ...@@ -250,12 +250,12 @@ public class TestCsv extends TestBase {
stat.execute("call csvwrite('" + baseDir + "/test.csv', 'select 1 id, ''Hello'' name')"); stat.execute("call csvwrite('" + baseDir + "/test.csv', 'select 1 id, ''Hello'' name')");
ResultSet rs = stat.executeQuery("select name from csvread('" + baseDir + "/test.csv')"); ResultSet rs = stat.executeQuery("select name from csvread('" + baseDir + "/test.csv')");
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "Hello"); assertEquals("Hello", rs.getString(1));
assertFalse(rs.next()); assertFalse(rs.next());
rs = stat.executeQuery("call csvread('" + baseDir + "/test.csv')"); rs = stat.executeQuery("call csvread('" + baseDir + "/test.csv')");
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getInt(1), 1); assertEquals(1, rs.getInt(1));
assertEquals(rs.getString(2), "Hello"); assertEquals("Hello", rs.getString(2));
assertFalse(rs.next()); assertFalse(rs.next());
new File(baseDir + "/test.csv").delete(); new File(baseDir + "/test.csv").delete();
conn.close(); conn.close();
...@@ -269,31 +269,31 @@ public class TestCsv extends TestBase { ...@@ -269,31 +269,31 @@ public class TestCsv extends TestBase {
file.close(); file.close();
ResultSet rs = Csv.getInstance().read(baseDir + "/test.csv", null, "UTF8"); ResultSet rs = Csv.getInstance().read(baseDir + "/test.csv", null, "UTF8");
ResultSetMetaData meta = rs.getMetaData(); ResultSetMetaData meta = rs.getMetaData();
assertEquals(meta.getColumnCount(), 4); assertEquals(4, meta.getColumnCount());
assertEquals(meta.getColumnLabel(1), "a"); assertEquals("a", meta.getColumnLabel(1));
assertEquals(meta.getColumnLabel(2), "b"); assertEquals("b", meta.getColumnLabel(2));
assertEquals(meta.getColumnLabel(3), "c"); assertEquals("c", meta.getColumnLabel(3));
assertEquals(meta.getColumnLabel(4), "d"); assertEquals("d", meta.getColumnLabel(4));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "201"); assertEquals("201", rs.getString(1));
assertEquals(rs.getString(2), "-2"); assertEquals("-2", rs.getString(2));
assertEquals(rs.getString(3), "0"); assertEquals("0", rs.getString(3));
assertEquals(rs.getString(4), "18"); assertEquals("18", rs.getString(4));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), null); assertEquals(null, rs.getString(1));
assertEquals(rs.getString(2), "abc\""); assertEquals("abc\"", rs.getString(2));
assertEquals(rs.getString(3), null); assertEquals(null, rs.getString(3));
assertEquals(rs.getString(4), ""); assertEquals("", rs.getString(4));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "1"); assertEquals("1", rs.getString(1));
assertEquals(rs.getString(2), "2"); assertEquals("2", rs.getString(2));
assertEquals(rs.getString(3), "3"); assertEquals("3", rs.getString(3));
assertEquals(rs.getString(4), "4"); assertEquals("4", rs.getString(4));
assertTrue(rs.next()); assertTrue(rs.next());
assertEquals(rs.getString(1), "5"); assertEquals("5", rs.getString(1));
assertEquals(rs.getString(2), "6"); assertEquals("6", rs.getString(2));
assertEquals(rs.getString(3), "7"); assertEquals("7", rs.getString(3));
assertEquals(rs.getString(4), "8"); assertEquals("8", rs.getString(4));
assertFalse(rs.next()); assertFalse(rs.next());
// a,b,c,d // a,b,c,d
...@@ -324,8 +324,8 @@ public class TestCsv extends TestBase { ...@@ -324,8 +324,8 @@ public class TestCsv extends TestBase {
assertEquals(2, meta.getColumnCount()); assertEquals(2, meta.getColumnCount());
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
rs.next(); rs.next();
assertEquals(rs.getString("ID"), "" + (i + 1)); assertEquals("" + (i + 1), rs.getString("ID"));
assertEquals(rs.getString("NAME"), "Ruebezahl"); assertEquals("Ruebezahl", rs.getString("NAME"));
} }
assertFalse(rs.next()); assertFalse(rs.next());
rs.close(); rs.close();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论