提交 8b2c84f7 authored 作者: Thomas Mueller's avatar Thomas Mueller

Experimental page store.

上级 a90603b0
......@@ -56,6 +56,11 @@ abstract class PageBtree extends Record {
*/
protected int start;
/**
* If the page was already written to the buffer.
*/
protected boolean written;
PageBtree(PageBtreeIndex index, int pageId, int parentPageId, DataPage data) {
this.index = index;
this.parentPageId = parentPageId;
......@@ -82,23 +87,36 @@ abstract class PageBtree extends Record {
*
* @param compare the row
* @param bigger if looking for a larger row
* @param add if the row should be added (check for duplicate keys)
* @return the index of the found row
*/
int find(SearchRow compare, boolean bigger) throws SQLException {
int find(SearchRow compare, boolean bigger, boolean add) throws SQLException {
if (compare == null) {
return 0;
}
int l = 0, r = entryCount;
int comp = 1;
while (l < r) {
int i = (l + r) >>> 1;
SearchRow row = (SearchRow) getRow(i);
int comp = index.compareRows(row, compare);
comp = index.compareRows(row, compare);
if (comp == 0 && add) {
if (index.indexType.getUnique()) {
if (!index.containsNullAndAllowMultipleNull(compare)) {
throw index.getDuplicateKeyException();
}
}
comp = index.compareKeys(row, compare);
}
if (comp > 0 || (!bigger && comp == 0)) {
r = i;
} else {
l = i + 1;
}
}
if (bigger && comp < 0) {
l++;
}
return l;
}
......
......@@ -68,9 +68,9 @@ public class PageBtreeCursor implements Cursor {
}
}
currentSearchRow = current.getRow(i);
currentRow = null;
if (last != null && index.compareRows(currentSearchRow, last) > 0) {
currentSearchRow = null;
currentRow = null;
return false;
}
i++;
......
......@@ -38,7 +38,7 @@ public class PageBtreeIndex extends BaseIndex {
IndexType indexType, int headPos) throws SQLException {
initBaseIndex(table, id, indexName, columns, indexType);
int test;
// trace.setLevel(TraceSystem.DEBUG);
// trace.setLevel(TraceSystem.DEBUG);
if (database.isMultiVersion()) {
int todoMvcc;
}
......
......@@ -9,7 +9,6 @@ package org.h2.index;
import java.sql.SQLException;
import org.h2.constant.ErrorCode;
import org.h2.message.Message;
import org.h2.result.Row;
import org.h2.result.SearchRow;
import org.h2.store.DataPage;
import org.h2.store.PageStore;
......@@ -31,8 +30,6 @@ class PageBtreeLeaf extends PageBtree {
private static final int KEY_OFFSET_PAIR_LENGTH = 6;
private static final int KEY_OFFSET_PAIR_START = 11;
private boolean written;
PageBtreeLeaf(PageBtreeIndex index, int pageId, int parentPageId, DataPage data) {
super(index, pageId, parentPageId, data);
start = KEY_OFFSET_PAIR_START;
......@@ -66,13 +63,12 @@ class PageBtreeLeaf extends PageBtree {
int addRow(SearchRow row) throws SQLException {
int rowLength = index.getRowSize(data, row);
int pageSize = index.getPageStore().getPageSize();
// TODO currently the order is important
// TODO and can only add at the end
int last = entryCount == 0 ? pageSize : offsets[entryCount - 1];
if (entryCount > 0 && last - rowLength < start + KEY_OFFSET_PAIR_LENGTH) {
int todoSplitAtLastInsertionPoint;
return (entryCount / 2) + 1;
}
written = false;
int offset = last - rowLength;
int[] newOffsets = new int[entryCount + 1];
SearchRow[] newRows = new SearchRow[entryCount + 1];
......@@ -80,11 +76,14 @@ class PageBtreeLeaf extends PageBtree {
if (entryCount == 0) {
x = 0;
} else {
x = find(row, false);
x = find(row, false, true);
System.arraycopy(offsets, 0, newOffsets, 0, x);
System.arraycopy(rows, 0, newRows, 0, x);
if (x < entryCount) {
System.arraycopy(offsets, x, newOffsets, x + 1, entryCount - x);
for (int j = x; j < entryCount; j++) {
newOffsets[j + 1] = offsets[j] - rowLength;
}
offset = (x == 0 ? pageSize : offsets[x - 1]) - rowLength;
System.arraycopy(rows, x, newRows, x + 1, entryCount - x);
}
}
......@@ -111,14 +110,20 @@ class PageBtreeLeaf extends PageBtree {
private void removeRow(int i) throws SQLException {
entryCount--;
written = false;
if (entryCount <= 0) {
Message.throwInternalError();
}
int[] newOffsets = new int[entryCount];
Row[] newRows = new Row[entryCount];
SearchRow[] newRows = new SearchRow[entryCount];
System.arraycopy(offsets, 0, newOffsets, 0, i);
System.arraycopy(rows, 0, newRows, 0, i);
System.arraycopy(offsets, i + 1, newOffsets, i, entryCount - i);
int startNext = i > 0 ? offsets[i - 1] : index.getPageStore().getPageSize();
int rowLength = startNext - offsets[i];
for (int j = i; j < entryCount; j++) {
newOffsets[j] = offsets[j + 1] + rowLength;
}
System.arraycopy(rows, i + 1, newRows, i, entryCount - i);
start -= KEY_OFFSET_PAIR_LENGTH;
offsets = newOffsets;
......@@ -144,7 +149,7 @@ class PageBtreeLeaf extends PageBtree {
}
boolean remove(SearchRow row) throws SQLException {
int at = find(row, false);
int at = find(row, false, false);
if (index.compareRows(row, getRow(at)) != 0) {
throw Message.getSQLException(ErrorCode.ROW_NOT_FOUND_WHEN_DELETING_1, index.getSQL() + ": " + row);
}
......@@ -205,7 +210,7 @@ class PageBtreeLeaf extends PageBtree {
}
void find(PageBtreeCursor cursor, SearchRow first, boolean bigger) throws SQLException {
int i = find(first, bigger);
int i = find(first, bigger, false);
if (i > entryCount) {
if (parentPageId == Page.ROOT) {
return;
......
......@@ -38,6 +38,7 @@ class PageBtreeNode extends PageBtree {
PageBtreeNode(PageBtreeIndex index, int pageId, int parentPageId, DataPage data) {
super(index, pageId, parentPageId, data);
start = CHILD_OFFSET_PAIR_START;
}
void read() {
......@@ -53,6 +54,7 @@ class PageBtreeNode extends PageBtree {
offsets[i] = data.readInt();
}
check();
start = data.length();
}
/**
......@@ -81,14 +83,18 @@ class PageBtreeNode extends PageBtree {
System.arraycopy(offsets, 0, newOffsets, 0, x);
System.arraycopy(rows, 0, newRows, 0, x);
if (x < entryCount) {
System.arraycopy(offsets, x, newOffsets, x + 1, entryCount - x);
for (int j = x; j < entryCount; j++) {
newOffsets[j + 1] = offsets[j] - rowLength;
}
offset = (x == 0 ? pageSize : offsets[x - 1]) - rowLength;
System.arraycopy(rows, x, newRows, x + 1, entryCount - x);
System.arraycopy(childPageIds, x, newChildPageIds, x + 1, entryCount - x + 1);
System.arraycopy(childPageIds, x + 1, newChildPageIds, x + 2, entryCount - x);
}
}
newOffsets[x] = offset;
newRows[x] = row;
newChildPageIds[x + 1] = childPageId;
start += CHILD_OFFSET_PAIR_LENGTH;
offsets = newOffsets;
rows = newRows;
childPageIds = newChildPageIds;
......@@ -98,7 +104,7 @@ class PageBtreeNode extends PageBtree {
int addRow(SearchRow row) throws SQLException {
while (true) {
int x = find(row, false);
int x = find(row, false, true);
PageBtree page = index.getPage(childPageIds[x]);
int splitPoint = page.addRow(row);
if (splitPoint == 0) {
......@@ -116,6 +122,7 @@ class PageBtreeNode extends PageBtree {
index.getPageStore().updateRecord(this, true, data);
}
updateRowCount(1);
written = false;
return 0;
}
......@@ -171,7 +178,7 @@ class PageBtreeNode extends PageBtree {
}
void find(PageBtreeCursor cursor, SearchRow first, boolean bigger) throws SQLException {
int i = find(first, bigger);
int i = find(first, bigger, false);
if (i > entryCount) {
if (parentPageId == Page.ROOT) {
return;
......@@ -190,7 +197,7 @@ class PageBtreeNode extends PageBtree {
}
boolean remove(SearchRow row) throws SQLException {
int at = find(row, false);
int at = find(row, false, false);
// merge is not implemented to allow concurrent usage of btrees
// TODO maybe implement merge
PageBtree page = index.getPage(childPageIds[at]);
......@@ -206,6 +213,7 @@ class PageBtreeNode extends PageBtree {
// no more children - this page is empty as well
return true;
}
written = false;
removeChild(at);
index.getPageStore().updateRecord(this, true, data);
return false;
......@@ -245,6 +253,18 @@ class PageBtreeNode extends PageBtree {
public void write(DataPage buff) throws SQLException {
check();
write();
index.getPageStore().writePage(getPos(), data);
}
private void write() throws SQLException {
if (written) {
return;
}
// make sure rows are read
for (int i = 0; i < entryCount; i++) {
getRow(i);
}
data.reset();
data.writeInt(parentPageId);
data.writeByte((byte) Page.TYPE_BTREE_NODE);
......@@ -255,28 +275,37 @@ class PageBtreeNode extends PageBtree {
data.writeInt(childPageIds[i]);
data.writeInt(offsets[i]);
}
index.getPageStore().writePage(getPos(), data);
for (int i = 0; i < entryCount; i++) {
index.writeRow(data, offsets[i], rows[i]);
}
written = true;
}
private void removeChild(int i) throws SQLException {
entryCount--;
written = false;
if (entryCount < 0) {
Message.throwInternalError();
}
SearchRow[] newRows = new SearchRow[entryCount];
int[] newOffsets = new int[entryCount];
SearchRow[] newRows = new SearchRow[entryCount + 1];
int[] newChildPageIds = new int[entryCount + 1];
System.arraycopy(offsets, 0, newOffsets, 0, Math.min(entryCount, i));
System.arraycopy(rows, 0, newRows, 0, Math.min(entryCount, i));
System.arraycopy(childPageIds, 0, newChildPageIds, 0, i);
if (entryCount > i) {
System.arraycopy(offsets, i + 1, newOffsets, i, entryCount - i);
System.arraycopy(rows, i + 1, newRows, i, entryCount - i);
int startNext = i > 0 ? offsets[i - 1] : index.getPageStore().getPageSize();
int rowLength = startNext - offsets[i];
for (int j = i; j < entryCount; j++) {
newOffsets[j] = offsets[j + 1] + rowLength;
}
}
System.arraycopy(childPageIds, i + 1, newChildPageIds, i, entryCount - i + 1);
offsets = newOffsets;
rows = newRows;
childPageIds = newChildPageIds;
start -= CHILD_OFFSET_PAIR_LENGTH;
}
/**
......@@ -286,7 +315,7 @@ class PageBtreeNode extends PageBtree {
* @param row the current row
*/
void nextPage(PageBtreeCursor cursor, SearchRow row) throws SQLException {
int i = find(row, true);
int i = find(row, true, false);
if (i > entryCount) {
if (parentPageId == Page.ROOT) {
cursor.setCurrent(null, 0);
......
......@@ -6,7 +6,6 @@
*/
package org.h2.index;
import java.sql.SQLException;
import java.util.Arrays;
import org.h2.constant.ErrorCode;
import org.h2.engine.Session;
......@@ -106,6 +105,7 @@ class PageDataLeaf extends PageData {
if (entryCount == 0) {
x = 0;
} else {
readAllRows();
x = find(row.getPos());
System.arraycopy(offsets, 0, newOffsets, 0, x);
System.arraycopy(keys, 0, newKeys, 0, x);
......@@ -118,6 +118,7 @@ class PageDataLeaf extends PageData {
System.arraycopy(rows, x, newRows, x + 1, entryCount - x);
}
}
written = false;
last = x == 0 ? pageSize : offsets[x - 1];
offset = last - rowLength;
entryCount++;
......@@ -168,6 +169,8 @@ class PageDataLeaf extends PageData {
}
private void removeRow(int i) throws SQLException {
written = false;
readAllRows();
entryCount--;
if (entryCount <= 0) {
Message.throwInternalError();
......@@ -178,8 +181,8 @@ class PageDataLeaf extends PageData {
System.arraycopy(offsets, 0, newOffsets, 0, i);
System.arraycopy(keys, 0, newKeys, 0, i);
System.arraycopy(rows, 0, newRows, 0, i);
int startNext = i < entryCount - 1 ? offsets[i + 1] : index.getPageStore().getPageSize();
int rowLength = offsets[i] - startNext;
int startNext = i > 0 ? offsets[i - 1] : index.getPageStore().getPageSize();
int rowLength = startNext - offsets[i];
for (int j = i; j < entryCount; j++) {
newOffsets[j] = offsets[j + 1] + rowLength;
}
......@@ -202,6 +205,10 @@ class PageDataLeaf extends PageData {
* @return the row
*/
Row getRowAt(int at) throws SQLException {
int test;
if (at >= rows.length) {
System.out.println("stop");
}
Row r = rows[at];
if (r == null) {
if (firstOverflowPageId != 0) {
......@@ -320,14 +327,17 @@ class PageDataLeaf extends PageData {
return index.getPageStore();
}
private void readAllRows() throws SQLException {
for (int i = 0; i < entryCount; i++) {
getRowAt(i);
}
}
private void write() throws SQLException {
if (written) {
return;
}
// make sure rows are read
for (int i = 0; i < entryCount; i++) {
getRowAt(i);
}
readAllRows();
data.reset();
data.writeInt(parentPageId);
int type;
......@@ -342,9 +352,6 @@ class PageDataLeaf extends PageData {
if (firstOverflowPageId != 0) {
data.writeInt(firstOverflowPageId);
}
if (getPos() == 1) {
System.out.println("pause");
}
for (int i = 0; i < entryCount; i++) {
data.writeInt(keys[i]);
data.writeShortInt(offsets[i]);
......
......@@ -40,7 +40,7 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
public PageScanIndex(TableData table, int id, IndexColumn[] columns, IndexType indexType, int headPos) throws SQLException {
initBaseIndex(table, id, table.getName() + "_TABLE_SCAN", columns, indexType);
int test;
trace.setLevel(TraceSystem.DEBUG);
// trace.setLevel(TraceSystem.DEBUG);
if (database.isMultiVersion()) {
int todoMvcc;
}
......@@ -89,10 +89,6 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
lastKey = Math.max(lastKey, row.getPos() + 1);
}
if (trace.isDebugEnabled()) {
int test;
if (table.getId() == -1) {
System.out.println("pause");
}
trace.debug("add table:" + table.getId() + " " + row);
}
if (tableData.getContainsLargeObject()) {
......@@ -197,8 +193,8 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
public void remove(Session session, Row row) throws SQLException {
if (trace.isDebugEnabled()) {
trace.debug("remove " + row.getPos());
if (table.getId() == -1) {
System.out.println("pause");
if (table.getId() == 0) {
System.out.println("table 0 remove");
}
}
if (tableData.getContainsLargeObject()) {
......
......@@ -11,7 +11,6 @@ import java.io.OutputStream;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.WeakHashMap;
import org.h2.constant.ErrorCode;
import org.h2.engine.Database;
import org.h2.engine.Session;
......@@ -37,8 +36,8 @@ import org.h2.util.CacheWriter;
import org.h2.util.FileUtils;
import org.h2.util.ObjectArray;
import org.h2.util.ObjectUtils;
import org.h2.util.SoftHashMap;
import org.h2.util.StringUtils;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueInt;
import org.h2.value.ValueString;
......@@ -61,9 +60,13 @@ import org.h2.value.ValueString;
*/
public class PageStore implements CacheWriter {
// TODO PageDataLeaf and Node: support random delete/add
// TODO PageStore.openMetaIndex (add collation for indexes,
// desc columns support)
// TODO btree index with fixed size values doesn't need offset and so on
// TODO log block allocation
// TODO use free-space bitmap
// TODO block compression: maybe http://en.wikipedia.org/wiki/LZJB
// with RLE, specially for 0s.
// TODO test that setPageId updates parent, overflow parent
......@@ -91,6 +94,7 @@ public class PageStore implements CacheWriter {
// TODO split files (1 GB max size)
// TODO add a setting (that can be changed at runtime) to call fsync
// and delay on each commit
// TODO var int: see google protocol buffers
/**
* The smallest possible page size.
......@@ -174,7 +178,7 @@ public class PageStore implements CacheWriter {
this.database = database;
trace = database.getTrace(Trace.PAGE_STORE);
int test;
trace.setLevel(TraceSystem.DEBUG);
// trace.setLevel(TraceSystem.DEBUG);
this.cacheSize = cacheSizeDefault;
String cacheType = database.getCacheType();
this.cache = CacheLRU.getCache(this, cacheType, cacheSize);
......@@ -257,7 +261,7 @@ trace.setLevel(TraceSystem.DEBUG);
*/
public void checkpoint() throws SQLException {
trace.debug("checkpoint");
if (getLog() == null) {
if (getLog() == null || database.isReadOnly()) {
// the file was never fully opened
return;
}
......@@ -270,6 +274,8 @@ trace.setLevel(TraceSystem.DEBUG);
writeBack(rec);
}
int todoFlushBeforeReopen;
// switch twice so there are no redo entries
switchLogIfPossible();
switchLogIfPossible();
int todoWriteDeletedPages;
}
......@@ -280,6 +286,9 @@ trace.setLevel(TraceSystem.DEBUG);
private void switchLogIfPossible() throws SQLException {
trace.debug("switchLogIfPossible");
if (database.isReadOnly()) {
return;
}
int id = getLog().getId();
getLog().close();
activeLog = (activeLog + 1) % LOG_COUNT;
......@@ -409,10 +418,6 @@ trace.setLevel(TraceSystem.DEBUG);
synchronized (database) {
Record record = (Record) obj;
if (trace.isDebugEnabled()) {
int test;
if (record.getPos() == 1) {
System.out.println("pause");
}
trace.debug("writeBack " + record);
}
int todoRemoveParameter;
......@@ -432,10 +437,6 @@ trace.setLevel(TraceSystem.DEBUG);
synchronized (database) {
if (trace.isDebugEnabled()) {
if (!record.isChanged()) {
int test;
if(record.getPos() == 1) {
System.out.println("pause");
}
trace.debug("updateRecord " + record.toString());
}
}
......@@ -817,6 +818,7 @@ trace.setLevel(TraceSystem.DEBUG);
cols.add(new Column("PARENT", Value.INT));
cols.add(new Column("HEAD", Value.INT));
cols.add(new Column("COLUMNS", Value.STRING));
// new CompareMode()
metaSchema = new Schema(database, 0, "", null, true);
int headPos = metaTableRootPageId;
metaTable = new TableData(metaSchema, "PAGE_INDEX",
......@@ -906,6 +908,9 @@ trace.setLevel(TraceSystem.DEBUG);
row.setValue(4, ValueString.get(columnList));
row.setPos(id + 1);
metaIndex.add(database.getSystemSession(), row);
int assertion;
metaIndex.getRow(database.getSystemSession(), row.getPos());
}
/**
......
......@@ -13,8 +13,14 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.h2.constant.SysProperties;
import org.h2.engine.ConnectionInfo;
import org.h2.engine.Database;
......@@ -58,13 +64,87 @@ public class TestPageStore extends TestBase {
}
public void test() throws Exception {
testFuzzOperations();
testScanIndex();
testBtreeIndex();
// testBtreeIndex();
// testAllocateFree();
// testStreamFuzz();
// testStreamPerformance(false, 1000);
// testPerformance(true, 1000000);
// testPerformance(false, 1000000);
}
private void testFuzzOperations() throws SQLException {
int best = Integer.MAX_VALUE;
for (int i = 0; i < 10; i++) {
int x = testFuzzOperationsSeed(i, 10);
if (x >= 0 && x < best) {
best = x;
fail("op:" + x + " seed:" + i);
}
}
}
private int testFuzzOperationsSeed(int seed, int len) throws SQLException {
deleteDb("test");
Connection conn = getConnection("test");
Statement stat = conn.createStatement();
log("DROP TABLE IF EXISTS TEST;");
stat.execute("DROP TABLE IF EXISTS TEST");
log("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR DEFAULT 'Hello World');");
stat.execute("CREATE TABLE TEST(ID INT PRIMARY KEY, NAME VARCHAR DEFAULT 'Hello World')");
Set rows = new TreeSet();
Random random = new Random(seed);
for (int i = 0; i < len; i++) {
int op = random.nextInt(3);
Integer x = new Integer(random.nextInt(100));
switch(op) {
case 0:
if (!rows.contains(x)) {
log("insert into test(id) values(" + x + ");");
stat.execute("INSERT INTO TEST(ID) VALUES("+ x + ");");
rows.add(x);
}
break;
case 1:
if (rows.contains(x)) {
log("delete from test where id=" + x + ";");
stat.execute("DELETE FROM TEST WHERE ID=" + x);
rows.remove(x);
}
break;
case 2:
conn.close();
conn = getConnection("test");
stat = conn.createStatement();
ResultSet rs = stat.executeQuery("SELECT * FROM TEST ORDER BY ID");
log("--reconnect");
for (Iterator it = rows.iterator(); it.hasNext();) {
int test = ((Integer) it.next()).intValue();
if (!rs.next()) {
log("error: expected next");
conn.close();
return i;
}
int y = rs.getInt(1);
// System.out.println(" " + x);
if (y != test) {
log("error: " + y + " <> " + test);
conn.close();
return i;
}
}
if (rs.next()) {
log("error: unexpected next");
conn.close();
return i;
}
}
}
conn.close();
return -1;
}
private void log(String m) {
trace(" " + m);
}
private void testBtreeIndex() throws SQLException {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论