提交 cf7264a1 authored 作者: Thomas Mueller's avatar Thomas Mueller

new experimental page store

上级 d832e4c0
...@@ -2275,7 +2275,7 @@ ANALYZEを実行する (詳細はドキュメントをご覧下さい) ...@@ -2275,7 +2275,7 @@ ANALYZEを実行する (詳細はドキュメントをご覧下さい)
理論上のデータの制限は現在256GBです。これはBLOBとCLOBデータを含まない状態での値です: BLOB、CLOBの最大値はそれぞれ256GB以上の設定が可能です。インデックスデータのサイズ制限も同様に256GBです。 理論上のデータの制限は現在256GBです。これはBLOBとCLOBデータを含まない状態での値です: BLOB、CLOBの最大値はそれぞれ256GB以上の設定が可能です。インデックスデータのサイズ制限も同様に256GBです。
@faq_1032_p @faq_1032_p
#The maximum number of rows per table is 2'147'483'648. #FAT、FAT32ファイルシステムの最大ファイルサイズは4GBです。よって、FATやFAT32を利用する場合は、データの制限は4GBということになります。 #The maximum number of rows per table is 2'147'483'648.
@faq_1033_p @faq_1033_p
FAT、FAT32ファイルシステムの最大ファイルサイズは4GBです。よって、FATやFAT32を利用する場合は、データの制限は4GBということになります。 FAT、FAT32ファイルシステムの最大ファイルサイズは4GBです。よって、FATやFAT32を利用する場合は、データの制限は4GBということになります。
...@@ -2332,7 +2332,7 @@ FAT、FAT32ファイルシステムの最大ファイルサイズは4GBです。 ...@@ -2332,7 +2332,7 @@ FAT、FAT32ファイルシステムの最大ファイルサイズは4GBです。
試験的に考慮された箇所は以下の通り: 試験的に考慮された箇所は以下の通り:
@faq_1051_li @faq_1051_li
#The PostgreSQL server #他のデータベースとの互換モード (一部の特徴のみ提供される) #The PostgreSQL server
@faq_1052_li @faq_1052_li
他のデータベースとの互換モード (一部の特徴のみ提供される) 他のデータベースとの互換モード (一部の特徴のみ提供される)
......
...@@ -41,5 +41,10 @@ class Page { ...@@ -41,5 +41,10 @@ class Page {
* An overflow pages (more to come). * An overflow pages (more to come).
*/ */
static final int TYPE_DATA_OVERFLOW_WITH_MORE = 7; static final int TYPE_DATA_OVERFLOW_WITH_MORE = 7;
/**
* This is a root page.
*/
static final int ROOT = 0;
} }
...@@ -29,12 +29,12 @@ abstract class PageData { ...@@ -29,12 +29,12 @@ abstract class PageData {
/** /**
* the page number. * the page number.
*/ */
protected final int pageId; protected int pageId;
/** /**
* The page number of the parent. * The page number of the parent.
*/ */
protected final int parentPageId; protected int parentPageId;
/** /**
* The number of entries. * The number of entries.
...@@ -42,9 +42,9 @@ abstract class PageData { ...@@ -42,9 +42,9 @@ abstract class PageData {
protected int entryCount; protected int entryCount;
/** /**
* If the page has unwritten changes. * The row keys.
*/ */
protected boolean changed; protected int[] keys;
PageData(PageScanIndex index, int pageId, int parentPageId, DataPageBinary data) { PageData(PageScanIndex index, int pageId, int parentPageId, DataPageBinary data) {
this.index = index; this.index = index;
...@@ -53,6 +53,28 @@ abstract class PageData { ...@@ -53,6 +53,28 @@ abstract class PageData {
this.data = data; this.data = data;
} }
/**
* Find an entry by key.
*
* @param key the key (may not exist)
* @return the matching or next index
*/
int find(int key) {
int l = 0, r = entryCount;
while (l < r) {
int i = (l + r) >>> 1;
int k = keys[i];
if (k > key) {
r = i;
} else if (k == key) {
return i;
} else {
l = i + 1;
}
}
return l;
}
/** /**
* Read the data. * Read the data.
*/ */
...@@ -72,11 +94,66 @@ abstract class PageData { ...@@ -72,11 +94,66 @@ abstract class PageData {
* *
* @return the cursor * @return the cursor
*/ */
abstract Cursor find(); abstract Cursor find() throws SQLException;
/** /**
* Write the page. * Write the page.
*/ */
abstract void write() throws SQLException; abstract void write() throws SQLException;
/**
* Get the key at this position.
*
* @param index the index
* @return the key
*/
int getKey(int index) {
return keys[index];
}
/**
* Split the index page at the given point.
*
* @param session the session
* @param splitPoint the index where to split
* @return the new page that contains about half the entries
*/
abstract PageData split(int splitPoint) throws SQLException;
/**
* Change the page id.
*
* @param id the new page id
*/
void setPageId(int id) {
this.pageId = id;
}
int getPageId() {
return pageId;
}
/**
* Get the last key of a page.
*
* @return the last key
*/
abstract int getLastKey() throws SQLException;
/**
* Get the first child leaf page of a page.
*
* @return the page
*/
abstract PageDataLeaf getFirstLeaf() throws SQLException;
/**
* Change the parent page id.
*
* @param id the new parent page id
*/
void setParentPageId(int id) {
this.parentPageId = id;
}
} }
...@@ -15,14 +15,14 @@ import org.h2.store.DataPageBinary; ...@@ -15,14 +15,14 @@ import org.h2.store.DataPageBinary;
/** /**
* A leaf page that contains data of one or multiple rows. * A leaf page that contains data of one or multiple rows.
* Format: * Format:
* <ul><li>0-3: parent page id * <ul><li>0-3: parent page id (0 for root)
* </li><li>4-4: page type * </li><li>4-4: page type
* </li><li>5-5: entry count * </li><li>5-6: entry count
* </li><li>only if there is overflow: 6-9: overflow page id * </li><li>only if there is overflow: 7-10: overflow page id
* </li><li>list of offsets (2 bytes each) * </li><li>list of key / offset pairs (4 bytes key, 2 bytes offset)
* </li></ul> * </li></ul>
* The format of an overflow page is: * The format of an overflow page is:
* <ul><li>0-3: parent page id * <ul><li>0-3: parent page id (0 for root)
* </li><li>4-4: page type * </li><li>4-4: page type
* </li><li>only if there is overflow: 5-8: next overflow page id * </li><li>only if there is overflow: 5-8: next overflow page id
* </li><li>data * </li><li>data
...@@ -57,13 +57,15 @@ class PageDataLeaf extends PageData { ...@@ -57,13 +57,15 @@ class PageDataLeaf extends PageData {
void read() throws SQLException { void read() throws SQLException {
data.setPos(4); data.setPos(4);
int type = data.readByte(); int type = data.readByte();
entryCount = data.readByte() & 255; entryCount = data.readShortInt();
offsets = new int[entryCount]; offsets = new int[entryCount];
keys = new int[entryCount];
rows = new Row[entryCount]; rows = new Row[entryCount];
if (type == Page.TYPE_DATA_LEAF_WITH_OVERFLOW) { if (type == Page.TYPE_DATA_LEAF_WITH_OVERFLOW) {
overflowPageId = data.readInt(); overflowPageId = data.readInt();
} }
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
keys[i] = data.readInt();
offsets[i] = data.readShortInt(); offsets[i] = data.readShortInt();
} }
start = data.length(); start = data.length();
...@@ -83,11 +85,12 @@ class PageDataLeaf extends PageData { ...@@ -83,11 +85,12 @@ class PageDataLeaf extends PageData {
type = Page.TYPE_DATA_LEAF_WITH_OVERFLOW; type = Page.TYPE_DATA_LEAF_WITH_OVERFLOW;
} }
data.writeByte((byte) type); data.writeByte((byte) type);
data.writeByte((byte) entryCount); data.writeShortInt(entryCount);
if (overflowPageId != 0) { if (overflowPageId != 0) {
data.writeInt(overflowPageId); data.writeInt(overflowPageId);
} }
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
data.writeInt(keys[i]);
data.writeShortInt(offsets[i]); data.writeShortInt(offsets[i]);
} }
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++) {
...@@ -115,38 +118,71 @@ class PageDataLeaf extends PageData { ...@@ -115,38 +118,71 @@ class PageDataLeaf extends PageData {
* @return the split point of this page, or 0 if no split is required * @return the split point of this page, or 0 if no split is required
*/ */
int addRow(Row row) throws SQLException { int addRow(Row row) throws SQLException {
if (entryCount >= 255) {
return entryCount / 2;
}
int rowLength = row.getByteCount(data); int rowLength = row.getByteCount(data);
int last = entryCount == 0 ? index.getPageStore().getPageSize() : offsets[entryCount - 1]; int last = entryCount == 0 ? index.getPageStore().getPageSize() : offsets[entryCount - 1];
int offset = last - rowLength; int offset = last - rowLength;
if (offset < start + 2) { int[] newOffsets = new int[entryCount + 1];
int[] newKeys = new int[entryCount + 1];
Row[] newRows = new Row[entryCount + 1];
int x;
if (entryCount == 0) {
x = 0;
} else {
x = find(row.getPos());
System.arraycopy(offsets, 0, newOffsets, 0, x);
System.arraycopy(keys, 0, newKeys, 0, x);
System.arraycopy(rows, 0, newRows, 0, x);
if (x < entryCount) {
System.arraycopy(offsets, x, newOffsets, x + 1, entryCount - x);
System.arraycopy(keys, x, newKeys, x + 1, entryCount - x);
System.arraycopy(rows, x, newRows, x + 1, entryCount - x);
}
}
entryCount++;
start += 6;
newOffsets[x] = offset;
newKeys[x] = row.getPos();
newRows[x] = row;
offsets = newOffsets;
keys = newKeys;
rows = newRows;
if (offset < start) {
if (entryCount > 0) { if (entryCount > 0) {
int todoSplitAtLastInsertionPoint;
return entryCount / 2; return entryCount / 2;
} }
offset = start + 2; offset = start + 6;
overflowPageId = index.getPageStore().allocatePage(); overflowPageId = index.getPageStore().allocatePage();
int todoWriteOverflow;
}
write();
return 0;
}
private void removeRow(int index) throws SQLException {
entryCount--;
if (entryCount <= 0) {
Message.getInternalError();
} }
changed = true;
entryCount++;
int[] newOffsets = new int[entryCount]; int[] newOffsets = new int[entryCount];
int[] newKeys = new int[entryCount];
Row[] newRows = new Row[entryCount]; Row[] newRows = new Row[entryCount];
System.arraycopy(offsets, 0, newOffsets, 0, entryCount - 1); System.arraycopy(offsets, 0, newOffsets, 0, index);
System.arraycopy(rows, 0, newRows, 0, entryCount - 1); System.arraycopy(keys, 0, newKeys, 0, index);
start += 2; System.arraycopy(rows, 0, newRows, 0, index);
newOffsets[entryCount - 1] = offset; System.arraycopy(offsets, index + 1, newOffsets, index, entryCount - index);
newRows[entryCount - 1] = row; System.arraycopy(keys, index + 1, newKeys, index, entryCount - index);
System.arraycopy(rows, index + 1, newRows, index, entryCount - index);
start -= 6;
offsets = newOffsets; offsets = newOffsets;
keys = newKeys;
rows = newRows; rows = newRows;
write();
return 0;
} }
Cursor find() { Cursor find() {
return new PageScanCursor(this, 0); return new PageScanCursor(this, 0);
} }
/** /**
* Get the row at the given index. * Get the row at the given index.
* *
...@@ -158,6 +194,7 @@ class PageDataLeaf extends PageData { ...@@ -158,6 +194,7 @@ class PageDataLeaf extends PageData {
if (r == null) { if (r == null) {
data.setPos(offsets[index]); data.setPos(offsets[index]);
r = this.index.readRow(data); r = this.index.readRow(data);
r.setPos(keys[index]);
rows[index] = r; rows[index] = r;
} }
return r; return r;
...@@ -166,5 +203,32 @@ class PageDataLeaf extends PageData { ...@@ -166,5 +203,32 @@ class PageDataLeaf extends PageData {
int getEntryCount() { int getEntryCount() {
return entryCount; return entryCount;
} }
PageData split(int splitPoint) throws SQLException {
int newPageId = index.getPageStore().allocatePage();
PageDataLeaf p2 = new PageDataLeaf(index, newPageId, parentPageId, index.getPageStore().createDataPage());
for (int i = splitPoint; i < entryCount;) {
p2.addRow(getRow(splitPoint));
removeRow(splitPoint);
}
return p2;
}
int getLastKey() throws SQLException {
int todoRemove;
return getRow(entryCount - 1).getPos();
}
public PageDataLeaf getNextPage() throws SQLException {
if (parentPageId == Page.ROOT) {
return null;
}
PageDataNode next = (PageDataNode) index.getPage(parentPageId);
return next.getNextPage(keys[entryCount - 1]);
}
PageDataLeaf getFirstLeaf() {
return this;
}
} }
...@@ -9,6 +9,7 @@ package org.h2.index; ...@@ -9,6 +9,7 @@ package org.h2.index;
import java.sql.SQLException; import java.sql.SQLException;
import org.h2.result.Row; import org.h2.result.Row;
import org.h2.result.SearchRow;
import org.h2.store.DataPageBinary; import org.h2.store.DataPageBinary;
/** /**
...@@ -16,35 +17,129 @@ import org.h2.store.DataPageBinary; ...@@ -16,35 +17,129 @@ import org.h2.store.DataPageBinary;
* Format: * Format:
* <ul><li>0-3: parent page id * <ul><li>0-3: parent page id
* </li><li>4-4: page type * </li><li>4-4: page type
* </li><li>5-5: entry count * </li><li>5-6: entry count
* </li><li>6- entries: 4 bytes leaf page id, 4 bytes key * </li><li>7-10: rightmost child page id
* </li><li>11- entries: 4 bytes leaf page id, 4 bytes key
* </li></ul> * </li></ul>
*/ */
class PageDataNode extends PageData { class PageDataNode extends PageData {
// optimization /**
// int childrenEntryCount; * The page ids of the children.
*/
int[] childPageIds;
PageDataNode(PageScanIndex index, int pageId, int parentPageId, DataPageBinary data) { PageDataNode(PageScanIndex index, int pageId, int parentPageId, DataPageBinary data) {
super(index, pageId, parentPageId, data); super(index, pageId, parentPageId, data);
int todoOptimizationChildrenEntryCount;
} }
void read() { void read() {
int todo; data.setPos(5);
entryCount = data.readShortInt();
childPageIds = new int[entryCount + 1];
childPageIds[entryCount] = data.readInt();
keys = new int[entryCount];
for (int i = 0; i < entryCount; i++) {
childPageIds[i] = data.readInt();
keys[i] = data.readInt();
}
}
void write() throws SQLException {
data.reset();
data.writeInt(parentPageId);
data.writeByte((byte) Page.TYPE_DATA_NODE);
data.writeShortInt(entryCount);
data.writeInt(childPageIds[entryCount]);
for (int i = 0; i < entryCount; i++) {
data.writeInt(childPageIds[i]);
data.writeInt(keys[i]);
}
index.getPageStore().writePage(pageId, data);
} }
int addRow(Row row) throws SQLException { int addRow(Row row) throws SQLException {
int todo; int x = find(row.getPos());
PageData page = index.getPage(childPageIds[x]);
int splitPoint = page.addRow(row);
if (splitPoint == 0) {
return 0;
}
int pivot = page.getKey(splitPoint);
PageData page2 = page.split(splitPoint);
int[] newKeys = new int[entryCount + 1];
int[] newChildPageIds = new int[entryCount + 2];
System.arraycopy(keys, 0, newKeys, 0, x);
System.arraycopy(childPageIds, 0, newChildPageIds, 0, x);
if (x < entryCount) {
System.arraycopy(keys, x, newKeys, x + 1, entryCount - x);
System.arraycopy(childPageIds, x, newChildPageIds, x + 1, entryCount - x + 1);
}
newKeys[x] = pivot;
newChildPageIds[x] = page2.getPageId();
keys = newKeys;
childPageIds = newChildPageIds;
entryCount++;
int maxEntries = (index.getPageStore().getPageSize() - 11) / 8;
if (entryCount >= maxEntries) {
int todoSplitAtLastInsertionPoint;
return entryCount / 2;
}
write();
return 0; return 0;
} }
Cursor find() { Cursor find() throws SQLException {
int child = childPageIds[0];
return index.getPage(child).find();
}
PageData split(int splitPoint) throws SQLException {
int todo; int todo;
return null; return null;
} }
void write() throws SQLException { /**
int todo; * Initialize the page.
*
* @param page1 the first child page
* @param pivot the pivot key
* @param page2 the last child page
*/
void init(PageData page1, int pivot, PageData page2) {
entryCount = 1;
childPageIds = new int[] { page1.getPageId(), page2.getPageId() };
keys = new int[] { pivot };
}
int getLastKey() throws SQLException {
int todoRemove;
return index.getPage(childPageIds[entryCount]).getLastKey();
}
/**
* Get the next leaf page.
*
* @param key the last key of the current page
* @return the next leaf page
*/
public PageDataLeaf getNextPage(int key) throws SQLException {
int i = find(key) + 1;
if (i > entryCount) {
if (parentPageId == Page.ROOT) {
return null;
}
PageDataNode next = (PageDataNode) index.getPage(parentPageId);
return next.getNextPage(keys[entryCount - 1]);
}
PageData page = index.getPage(childPageIds[i]);
return page.getFirstLeaf();
}
PageDataLeaf getFirstLeaf() throws SQLException {
int child = childPageIds[0];
return index.getPage(child).getFirstLeaf();
} }
} }
...@@ -38,13 +38,16 @@ class PageScanCursor implements Cursor { ...@@ -38,13 +38,16 @@ class PageScanCursor implements Cursor {
} }
public boolean next() throws SQLException { public boolean next() throws SQLException {
int todo; if (index >= current.getEntryCount()) {
if (index < current.getEntryCount()) { current = current.getNextPage();
row = current.getRow(index); index = 0;
index++; if (current == null) {
return true; return false;
}
} }
return false; row = current.getRow(index);
index++;
return true;
} }
public boolean previous() throws SQLException { public boolean previous() throws SQLException {
......
...@@ -30,6 +30,14 @@ public class PageScanIndex extends BaseIndex implements RowIndex { ...@@ -30,6 +30,14 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
private TableData tableData; private TableData tableData;
private int headPos; private int headPos;
// TODO remember last page with deleted keys (in the root page?),
// and chain such pages
// TODO order pages so that searching for a key
// doesn't seek backwards in the file
private int nextKey;
// TODO remember the row count (in the root page?)
public PageScanIndex(TableData table, int id, IndexColumn[] columns, IndexType indexType, int headPos) throws SQLException { public PageScanIndex(TableData table, int id, IndexColumn[] columns, IndexType indexType, int headPos) throws SQLException {
initBaseIndex(table, id, table.getName() + "_TABLE_SCAN", columns, indexType); initBaseIndex(table, id, table.getName() + "_TABLE_SCAN", columns, indexType);
if (database.isMultiVersion()) { if (database.isMultiVersion()) {
...@@ -44,11 +52,11 @@ public class PageScanIndex extends BaseIndex implements RowIndex { ...@@ -44,11 +52,11 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
if (headPos == Index.EMPTY_HEAD || headPos >= store.getPageCount()) { if (headPos == Index.EMPTY_HEAD || headPos >= store.getPageCount()) {
// new table // new table
headPos = store.allocatePage(); headPos = store.allocatePage();
PageDataLeaf root = new PageDataLeaf(this, headPos, 1, store.createDataPage()); PageDataLeaf root = new PageDataLeaf(this, headPos, Page.ROOT, store.createDataPage());
root.write(); root.write();
} else { } else {
int todo; int todoRowCount;
rowCount = 10; rowCount = getPage(headPos).getLastKey();
} }
this.headPos = headPos; this.headPos = headPos;
table.setRowCount(rowCount); table.setRowCount(rowCount);
...@@ -56,14 +64,36 @@ public class PageScanIndex extends BaseIndex implements RowIndex { ...@@ -56,14 +64,36 @@ public class PageScanIndex extends BaseIndex implements RowIndex {
public void add(Session session, Row row) throws SQLException { public void add(Session session, Row row) throws SQLException {
int invalidateRowCount; row.setPos((int) rowCount);
PageData root = getPage(headPos); PageData root = getPage(headPos);
root.addRow(row); int splitPoint = root.addRow(row);
if (splitPoint != 0) {
int pivot = root.getKey(splitPoint);
PageData page1 = root;
PageData page2 = root.split(splitPoint);
int rootPageId = root.getPageId();
int id = store.allocatePage();
page1.setPageId(id);
page1.setParentPageId(headPos);
PageDataNode newRoot = new PageDataNode(this, rootPageId, Page.ROOT, store.createDataPage());
newRoot.init(page1, pivot, page2);
page1.write();
page2.write();
newRoot.write();
root = newRoot;
}
rowCount++; rowCount++;
} }
private PageData getPage(int id) throws SQLException { /**
* Read the given page.
*
* @param id the page id
* @return the page
*/
PageData getPage(int id) throws SQLException {
DataPageBinary data = store.readPage(id); DataPageBinary data = store.readPage(id);
data.reset();
int parentPageId = data.readInt(); int parentPageId = data.readInt();
int type = data.readByte() & 255; int type = data.readByte() & 255;
PageData result; PageData result;
......
...@@ -572,4 +572,4 @@ localization olivier hprof jps jstack qua processor casting brasilia leap ...@@ -572,4 +572,4 @@ localization olivier hprof jps jstack qua processor casting brasilia leap
daylight vision declarative shape formula webapp catalina study impact daylight vision declarative shape formula webapp catalina study impact
statisticlog activeobjects manske redeployment michael kaspersky datatext statisticlog activeobjects manske redeployment michael kaspersky datatext
bleyl donald conservative offsets diabetes ansorg allocating osmond gluco bleyl donald conservative offsets diabetes ansorg allocating osmond gluco
joachim joachim gpl
\ No newline at end of file \ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论