提交 6ab864dc authored 作者: Thomas Mueller's avatar Thomas Mueller

A persistent tree map (work in progress).

上级 e920b890
......@@ -29,6 +29,7 @@ public class TestBtreeMapStore extends TestBase {
}
public void test() {
testTruncateFile();
testFastDelete();
testRollbackInMemory();
testRollbackStored();
......@@ -43,6 +44,29 @@ public class TestBtreeMapStore extends TestBase {
testSimple();
}
private void testTruncateFile() {
String fileName = getBaseDir() + "/testMeta.h3";
FileUtils.delete(fileName);
BtreeMapStore s;
BtreeMap<Integer, String> m;
s = openStore(fileName);
m = s.openMap("data", Integer.class, String.class);
for (int i = 0; i < 1000; i++) {
m.put(i, "Hello World");
}
s.store();
s.close();
long len = FileUtils.size(fileName);
s = openStore(fileName);
m = s.openMap("data", Integer.class, String.class);
m.clear();
s.store();
s.compact(100);
s.close();
long len2 = FileUtils.size(fileName);
assertTrue(len2 < len);
}
private void testFastDelete() {
String fileName = getBaseDir() + "/testMeta.h3";
FileUtils.delete(fileName);
......@@ -53,7 +77,9 @@ public class TestBtreeMapStore extends TestBase {
m = s.openMap("data", Integer.class, String.class);
for (int i = 0; i < 1000; i++) {
m.put(i, "Hello World");
assertEquals(i + 1, m.size());
}
assertEquals(1000, m.size());
s.store();
assertEquals(3, s.getWriteCount());
s.close();
......@@ -61,6 +87,7 @@ public class TestBtreeMapStore extends TestBase {
s = openStore(fileName);
m = s.openMap("data", Integer.class, String.class);
m.clear();
assertEquals(0, m.size());
s.store();
// ensure only nodes are read, but not leaves
assertEquals(4, s.getReadCount());
......@@ -76,7 +103,7 @@ public class TestBtreeMapStore extends TestBase {
assertEquals(-1, s.getRetainChunk());
s.setRetainChunk(0);
assertEquals(0, s.getRetainChunk());
assertEquals(0, s.getCurrentVersion());
assertEquals(1, s.getCurrentVersion());
assertFalse(s.hasUnsavedChanges());
BtreeMap<String, String> m = s.openMap("data", String.class, String.class);
assertTrue(s.hasUnsavedChanges());
......@@ -86,12 +113,14 @@ public class TestBtreeMapStore extends TestBase {
s.rollbackTo(1);
assertEquals("Hello", m.get("1"));
long v2 = s.store();
assertEquals(2, v2);
assertEquals(3, s.getCurrentVersion());
assertFalse(s.hasUnsavedChanges());
s.close();
s = openStore(fileName);
assertEquals(3, s.getCurrentVersion());
s.setRetainChunk(0);
assertEquals(2, s.getCurrentVersion());
meta = s.getMetaMap();
m = s.openMap("data", String.class, String.class);
m0 = s.openMap("data0", String.class, String.class);
......@@ -101,6 +130,7 @@ public class TestBtreeMapStore extends TestBase {
m1.put("1", "Hallo");
assertEquals("Hallo", m.get("1"));
assertEquals("Hallo", m1.get("1"));
assertTrue(s.hasUnsavedChanges());
s.rollbackTo(v2);
assertFalse(s.hasUnsavedChanges());
assertNull(meta.get("map.data1"));
......@@ -111,7 +141,7 @@ public class TestBtreeMapStore extends TestBase {
s = openStore(fileName);
s.setRetainChunk(0);
assertEquals(2, s.getCurrentVersion());
assertEquals(3, s.getCurrentVersion());
meta = s.getMetaMap();
assertTrue(meta.get("map.data") != null);
assertTrue(meta.get("map.data0") != null);
......@@ -123,10 +153,10 @@ public class TestBtreeMapStore extends TestBase {
assertFalse(m0.isReadOnly());
m.put("1", "Hallo");
s.commit();
assertEquals(3, s.getCurrentVersion());
assertEquals(4, s.getCurrentVersion());
long v4 = s.store();
assertEquals(4, v4);
assertEquals(4, s.getCurrentVersion());
assertEquals(5, s.getCurrentVersion());
s.close();
s = openStore(fileName);
......@@ -155,8 +185,14 @@ public class TestBtreeMapStore extends TestBase {
String fileName = getBaseDir() + "/testMeta.h3";
FileUtils.delete(fileName);
BtreeMapStore s = openStore(fileName);
assertEquals(1, s.getCurrentVersion());
s.setMaxPageSize(5);
BtreeMap<String, String> m = s.openMap("data", String.class, String.class);
s.rollbackTo(0);
assertTrue(m.isClosed());
assertEquals(1, s.getCurrentVersion());
m = s.openMap("data", String.class, String.class);
BtreeMap<String, String> m0 = s.openMap("data0", String.class, String.class);
BtreeMap<String, String> m2 = s.openMap("data2", String.class, String.class);
m.put("1", "Hello");
......@@ -165,6 +201,7 @@ public class TestBtreeMapStore extends TestBase {
}
long v1 = s.commit();
assertEquals(1, v1);
assertEquals(2, s.getCurrentVersion());
BtreeMap<String, String> m1 = s.openMap("data1", String.class, String.class);
assertEquals("Test", m2.get("1"));
m.put("1", "Hallo");
......@@ -174,6 +211,7 @@ public class TestBtreeMapStore extends TestBase {
assertEquals("Hallo", m.get("1"));
assertEquals("Hallo", m1.get("1"));
s.rollbackTo(v1);
assertEquals(2, s.getCurrentVersion());
for (int i = 0; i < 10; i++) {
assertEquals("Test", m2.get("" + i));
}
......@@ -195,11 +233,11 @@ public class TestBtreeMapStore extends TestBase {
data.put("1", "Hello");
data.put("2", "World");
s.store();
assertEquals("1/0//", m.get("map.data"));
assertEquals("1/1//", m.get("map.data"));
assertTrue(m.containsKey("chunk.1"));
data.put("1", "Hallo");
s.store();
assertEquals("1/0//", m.get("map.data"));
assertEquals("1/1//", m.get("map.data"));
assertTrue(m.get("root.1").length() > 0);
assertTrue(m.containsKey("chunk.1"));
assertTrue(m.containsKey("chunk.2"));
......@@ -295,7 +333,7 @@ public class TestBtreeMapStore extends TestBase {
m.put(j + i, "Hello " + j);
}
s.store();
s.compact();
s.compact(80);
s.close();
long len = FileUtils.size(fileName);
// System.out.println(" len:" + len);
......@@ -313,13 +351,13 @@ public class TestBtreeMapStore extends TestBase {
m.remove(i);
}
s.store();
s.compact();
s.compact(80);
s.close();
// len = FileUtils.size(fileName);
// System.out.println("len1: " + len);
s = openStore(fileName);
m = s.openMap("data", Integer.class, String.class);
s.compact();
s.compact(80);
s.close();
// len = FileUtils.size(fileName);
// System.out.println("len2: " + len);
......
......@@ -6,7 +6,9 @@
*/
package org.h2.dev.store.btree;
import java.util.AbstractSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
/**
......@@ -22,6 +24,11 @@ public class BtreeMap<K, V> {
private final DataType keyType;
private final DataType valueType;
private final long createVersion;
/**
* The map of old roots. The key is the new version, the value is the root
* before this version.
*/
private final TreeMap<Long, Page> oldRoots = new TreeMap<Long, Page>();
private BtreeMapStore store;
private Page root;
......@@ -45,7 +52,11 @@ public class BtreeMap<K, V> {
public void put(K key, V data) {
checkWrite();
Page oldRoot = root;
root = Page.put(this, root, store.getCurrentVersion(), key, data);
if (containsKey(key)) {
root = Page.set(this, root, store.getCurrentVersion(), key, data);
} else {
root = Page.add(this, root, store.getCurrentVersion(), key, data);
}
markChanged(oldRoot);
}
......@@ -56,7 +67,7 @@ public class BtreeMap<K, V> {
* @return the value, or null if not found
*/
@SuppressWarnings("unchecked")
public V get(K key) {
public V get(Object key) {
checkOpen();
if (root == null) {
return null;
......@@ -64,7 +75,7 @@ public class BtreeMap<K, V> {
return (V) root.find(key);
}
public boolean containsKey(K key) {
public boolean containsKey(Object key) {
return get(key) != null;
}
......@@ -102,7 +113,7 @@ public class BtreeMap<K, V> {
if (root != null) {
root.removeAllRecursive();
}
store.removeMap(id);
store.removeMap(name);
close();
}
......@@ -118,22 +129,25 @@ public class BtreeMap<K, V> {
}
/**
* Remove a key-value pair.
* Remove a key-value pair, if the key exists.
*
* @param key the key
*/
public void remove(K key) {
checkWrite();
if (root != null) {
if (containsKey(key)) {
Page oldRoot = root;
root = Page.remove(root, store.getCurrentVersion(), key);
root = Page.removeExisting(root, store.getCurrentVersion(), key);
markChanged(oldRoot);
}
}
private void markChanged(Page oldRoot) {
if (oldRoot != root) {
oldRoots.put(store.getCurrentVersion(), oldRoot);
long v = store.getCurrentVersion();
if (!oldRoots.containsKey(v)) {
oldRoots.put(v, oldRoot);
}
store.markChanged(this);
}
}
......@@ -201,6 +215,28 @@ public class BtreeMap<K, V> {
return new Cursor<K>(root, from);
}
public Set<K> keySet() {
checkOpen();
return new AbstractSet<K>() {
@Override
public Iterator<K> iterator() {
return new Cursor<K>(getRoot(), null);
}
@Override
public int size() {
return BtreeMap.this.size();
}
@Override
public boolean contains(Object o) {
return BtreeMap.this.containsKey(o);
}
};
}
/**
* Get the root page.
*
......@@ -229,7 +265,7 @@ public class BtreeMap<K, V> {
void rollbackTo(long version) {
checkWrite();
if (version <= createVersion) {
if (version < createVersion) {
remove();
} else {
// iterating in ascending order, and pick the last version -
......@@ -238,7 +274,7 @@ public class BtreeMap<K, V> {
Long newestOldVersion = null;
for (Iterator<Long> it = oldRoots.keySet().iterator(); it.hasNext();) {
Long x = it.next();
if (x >= version) {
if (x > version) {
if (newestOldVersion == null) {
newestOldVersion = x;
root = oldRoots.get(x);
......@@ -249,6 +285,10 @@ public class BtreeMap<K, V> {
}
}
void stored() {
oldRoots.clear();
}
public void setReadOnly(boolean readOnly) {
this.readOnly = readOnly;
}
......@@ -286,6 +326,15 @@ public class BtreeMap<K, V> {
return id;
}
public int size() {
long size = getSize();
return size > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) size;
}
public long getSize() {
return root == null ? 0 : root.getTotalSize();
}
public boolean equals(Object o) {
return this == o;
}
......
......@@ -38,25 +38,23 @@ header:
blockSize=4096
TODO:
- support fast range deletes
- support custom pager for r-tree, kd-tree
- need an 'end of chunk' marker to verify all data is written
- truncate the file if it is empty
- support custom map types (page types); pager for r-tree, kd-tree
- ability to diff / merge versions
- check if range reads would be faster
- map.getVersion and opening old maps read-only
- limited support for writing to old versions (branches)
- Serializer instead of DataType, (serialize, deserialize)
- implement Map interface
- implement complete java.util.Map interface
- maybe rename to MVStore, MVMap, TestMVStore
- implement Map interface
- atomic operations (test-and-set)
- support back ground writes (store old version)
- atomic test-and-set (when supporting concurrent writes)
- support background writes (store old version)
- re-use map ids that were not used for a very long time
- file header could be a regular chunk, end of file the second
- possibly split chunk data into immutable and mutable
- reduce minimum chunk size, speed up very small transactions
- defragment: use total max length instead of page count (liveCount)
- test with very small chunks, possibly speed up very small transactions
- compact: use total max length instead of page count (liveCount)
- check what happens on concurrent reads and 1 write; multiple writes
- support large binaries
- support stores that span multiple files (chunks stored in other files)
*/
......@@ -65,6 +63,8 @@ TODO:
*/
public class BtreeMapStore {
public static final boolean ASSERT = true;
private static final StringType STRING_TYPE = new StringType();
private final String fileName;
......@@ -106,7 +106,7 @@ public class BtreeMapStore {
private Compressor compressor = new CompressLZF();
private long currentVersion;
private long currentVersion = 1;
private int readCount;
private int writeCount;
......@@ -228,8 +228,8 @@ public class BtreeMapStore {
return openMap(name, keyType, valueType);
}
void removeMap(int id) {
BtreeMap<?, ?> m = maps.remove(id);
void removeMap(String name) {
BtreeMap<?, ?> m = maps.remove(name);
mapsChanged.remove(m);
}
......@@ -382,21 +382,38 @@ public class BtreeMapStore {
return filePos;
}
/**
* Commit the changes, incrementing the current version.
*
* @return the version before the commit
*/
public long commit() {
return currentVersion++;
}
/**
* Commit all changes and persist them to disk. This method does nothing if
* there are no unsaved changes.
* there are no unsaved changes, otherwise it stores the data and increments
* the current version.
*
* @return the new version
* @return the version before the commit
*/
public long store() {
if (!hasUnsavedChanges()) {
return currentVersion;
}
long newVersion = commit();
// the last chunk might have been changed in the last save()
// this needs to be updated now (it's better not to update right after,
// save(), because that would modify the meta map again)
Chunk c = chunks.get(lastChunkId);
if (c != null) {
meta.put("chunk." + c.id, c.toString());
}
int chunkId = ++lastChunkId;
Chunk c = new Chunk(chunkId);
c = new Chunk(chunkId);
c.entryCount = Integer.MAX_VALUE;
c.liveCount = Integer.MAX_VALUE;
c.start = Long.MAX_VALUE;
......@@ -415,9 +432,6 @@ public class BtreeMapStore {
}
applyFreedChunks();
}
for (int x : removedChunks) {
chunks.remove(x);
}
int count = 0;
int maxLength = 1 + 4 + 4 + 8;
for (BtreeMap<?, ?> m : mapsChanged.values()) {
......@@ -464,6 +478,14 @@ public class BtreeMapStore {
int length = buff.limit();
long filePos = allocateChunk(length);
// need to keep old chunks
// until they are are no longer referenced
// by a old version
// so empty space is not reused too early
for (int x : removedChunks) {
chunks.remove(x);
}
buff.rewind();
buff.put((byte) 'c');
buff.putInt(length);
......@@ -478,14 +500,16 @@ public class BtreeMapStore {
throw new RuntimeException(e);
}
rootChunkStart = filePos;
writeHeader();
revertTemp();
// update the start position and length
c.start = filePos;
c.length = length;
meta.put("chunk." + c.id, c.toString());
return newVersion;
long version = commit();
// write the new version (after the commit)
writeHeader();
shrinkFileIfPossible();
return version;
}
private void applyFreedChunks() {
......@@ -499,17 +523,32 @@ public class BtreeMapStore {
freedChunks.clear();
}
private void shrinkFileIfPossible() {
long used = getFileLengthUsed();
try {
if (used < file.size()) {
file.truncate(used);
}
} catch (Exception e) {
throw convert(e);
}
}
private long getFileLengthUsed() {
int min = 0;
for (Chunk c : chunks.values()) {
if (c.start == Long.MAX_VALUE) {
continue;
}
int last = (int) ((c.start + c.length) / blockSize);
min = Math.max(min, last + 1);
}
return min * blockSize;
}
private long allocateChunk(long length) {
if (!reuseSpace) {
int min = 0;
for (Chunk c : chunks.values()) {
if (c.start == Long.MAX_VALUE) {
continue;
}
int last = (int) ((c.start + c.length) / blockSize);
min = Math.max(min, last + 1);
}
return min * blockSize;
return getFileLengthUsed();
}
BitSet set = new BitSet();
set.set(0);
......@@ -526,7 +565,7 @@ public class BtreeMapStore {
for (int i = 0; i < set.size(); i++) {
if (!set.get(i)) {
boolean ok = true;
for (int j = 1; j <= required; j++) {
for (int j = 0; j < required; j++) {
if (set.get(i + j)) {
ok = false;
break;
......@@ -552,15 +591,6 @@ public class BtreeMapStore {
return id;
}
/**
* Commit the changes, incrementing the current version.
*
* @return the new version
*/
public long commit() {
return ++currentVersion;
}
/**
* Check whether there are any unsaved changes.
*
......@@ -603,31 +633,42 @@ public class BtreeMapStore {
/**
* Try to reduce the file size. Chunks with a low number of live items will
* be re-written.
* be re-written. If the current fill rate is higher than the target fill
* rate, no optimization is done.
*
* @param fillRate the minimum percentage of live entries
* @return if anything was written
*/
public void compact() {
if (chunks.size() <= 1) {
return;
public boolean compact(int fillRate) {
if (chunks.size() == 0) {
// avoid division by 0
return false;
}
long liveCountTotal = 0, entryCountTotal = 0;
for (Chunk c : chunks.values()) {
entryCountTotal += c.entryCount;
liveCountTotal += c.liveCount;
}
int averageEntryCount = (int) (entryCountTotal / chunks.size());
if (entryCountTotal == 0) {
return;
if (entryCountTotal <= 0) {
// avoid division by 0
entryCountTotal = 1;
}
int percentTotal = (int) (100 * liveCountTotal / entryCountTotal);
if (percentTotal > 80) {
return;
if (percentTotal > fillRate) {
return false;
}
// calculate how many entries a chunk has on average
// TODO use the max size instead of the count
int averageEntryCount = (int) (entryCountTotal / chunks.size());
// the 'old' list contains the chunks we want to free up
ArrayList<Chunk> old = New.arrayList();
for (Chunk c : chunks.values()) {
int age = lastChunkId - c.id + 1;
c.collectPriority = c.getFillRate() / age;
old.add(c);
}
// sort the list, so the first entry should be collected first
Collections.sort(old, new Comparator<Chunk>() {
public int compare(Chunk o1, Chunk o2) {
return new Integer(o1.collectPriority).compareTo(o2.collectPriority);
......@@ -635,6 +676,8 @@ public class BtreeMapStore {
});
int moveCount = 0;
Chunk move = null;
// find out up to were we need to move
// try to move one (average sized) chunk
for (Chunk c : old) {
if (moveCount + c.liveCount > averageEntryCount) {
break;
......@@ -643,6 +686,8 @@ public class BtreeMapStore {
moveCount += c.liveCount;
move = c;
}
// remove the chunks we want to keep from this list
boolean remove = false;
for (Iterator<Chunk> it = old.iterator(); it.hasNext();) {
Chunk c = it.next();
......@@ -652,25 +697,34 @@ public class BtreeMapStore {
it.remove();
}
}
// TODO not needed - we already have the chunk object
Chunk header = readChunkHeader(move.start);
log(" meta:" + move.id + "/" + header.metaRootPos + " start: " + move.start);
while (!isKnownVersion(move.version)) {
int id = move.id;
while (true) {
Chunk m = chunks.get(++id);
if (id > lastChunkId) {
// no known version
return false;
}
if (m != null) {
move = m;
break;
}
}
}
// the metaRootPos might not be set
move = readChunkHeader(move.start);
log(" meta:" + move.id + "/" + move.metaRootPos + " start: " + move.start);
// change at least one entry in the map
// to ensure a chunk will be written
// (even if there is nothing to move)
meta.put("chunk." + move.id, move.toString());
BtreeMap<String, String> oldMeta = new BtreeMap<String, String>(this, 0, "old-meta", STRING_TYPE, STRING_TYPE, 0);
oldMeta.setRootPos(header.metaRootPos);
oldMeta.setRootPos(move.metaRootPos);
Iterator<String> it = oldMeta.keyIterator(null);
ArrayList<Integer> oldChunks = New.arrayList();
while (it.hasNext()) {
String k = it.next();
String s = oldMeta.get(k);
log(" " + k + " " + s.replace('\n', ' '));
if (k.startsWith("chunk.")) {
Chunk c = Chunk.fromString(s);
if (!chunks.containsKey(c.id)) {
oldChunks.add(c.id);
chunks.put(c.id, c);
}
continue;
}
if (!k.startsWith("map.")) {
continue;
}
......@@ -696,6 +750,7 @@ public class BtreeMapStore {
Page p = data.getPage(o);
if (p == null) {
// was removed later - ignore
// or the chunk no longer exists
} else if (p.getPos() < 0) {
// temporarily changed - ok
// TODO move old data if there is an uncommitted change?
......@@ -711,9 +766,8 @@ public class BtreeMapStore {
}
}
}
for (int o : oldChunks) {
chunks.remove(o);
}
store();
return true;
}
/**
......@@ -829,13 +883,17 @@ public class BtreeMapStore {
if (version > currentVersion || version < 0) {
return false;
}
if (chunks.size() == 0) {
if (version == currentVersion || chunks.size() == 0) {
// no stored data
return true;
}
// need to check if a chunk for this version exists
Chunk c = getChunkForVersion(version);
if (c == null) {
return false;
}
// also, all check referenced by this version
// need to be available in the file
BtreeMap<String, String> oldMeta = getMetaMap(version);
if (oldMeta == null) {
return false;
......@@ -857,7 +915,7 @@ public class BtreeMapStore {
* forgotten. All maps that were created later are closed. A rollback to
* a version before the last stored version is immediately persisted.
*
* @param version the version to keep
* @param version the version to revert to
*/
public void rollbackTo(long version) {
if (!isKnownVersion(version)) {
......@@ -896,7 +954,7 @@ public class BtreeMapStore {
for (BtreeMap<?, ?> m : maps.values()) {
if (m.getCreatedVersion() > version) {
m.close();
removeMap(m.getId());
removeMap(m.getName());
} else {
if (loadFromFile) {
String r = meta.get("root." + m.getId());
......@@ -905,11 +963,14 @@ public class BtreeMapStore {
}
}
}
this.currentVersion = version;
this.currentVersion = version + 1;
}
private void revertTemp() {
freedChunks.clear();
for (BtreeMap<?, ?> m : mapsChanged.values()) {
m.stored();
}
mapsChanged.clear();
temp.clear();
tempPageId = 0;
......@@ -917,8 +978,7 @@ public class BtreeMapStore {
/**
* Get the current version of the store. When a new store is created, the
* version is 0. For each commit, it is incremented by one if there was a
* change.
* version is 1. For each commit, it is incremented by one.
*
* @return the version
*/
......
......@@ -35,6 +35,11 @@ public class DataUtils {
*/
public static final int MAX_VAR_INT_LEN = 5;
/**
* The maximum length of a variable size long.
*/
public static final int MAX_VAR_LONG_LEN = 10;
/**
* Get the length of the variable size int.
*
......
......@@ -18,26 +18,27 @@ import org.h2.compress.Compressor;
* For nodes, the key at a given index is larger than the largest key of the
* child at the same index.
* <p>
* File format:
* page length (including length): int
* check value: short
* map id: varInt
* number of keys: varInt
* type: byte (0: leaf, 1: node; +2: compressed)
* compressed: bytes saved (varInt)
* keys
* leaf: values (one for each key)
* node: children (1 more than keys)
* File format: page length (including length): int check value: short map id:
* varInt number of keys: varInt type: byte (0: leaf, 1: node; +2: compressed)
* compressed: bytes saved (varInt) keys leaf: values (one for each key) node:
* children (1 more than keys)
*/
public class Page {
private static final IllegalArgumentException KEY_NOT_FOUND = new IllegalArgumentException(
"Key not found");
private static final IllegalArgumentException KEY_ALREADY_EXISTS = new IllegalArgumentException(
"Key already exists");
private final BtreeMap<?, ?> map;
private final long version;
private long pos;
private Object[] keys;
private Object[] values;
private long[] children;
private long[] childrenSize;
private int cachedCompare;
private long totalSize;
private Page(BtreeMap<?, ?> map, long version) {
this.map = map;
......@@ -54,12 +55,16 @@ public class Page {
* @param children the children
* @return the page
*/
static Page create(BtreeMap<?, ?> map, long version, Object[] keys, Object[] values, long[] children) {
static Page create(BtreeMap<?, ?> map, long version, Object[] keys,
Object[] values, long[] children, long[] childrenSize,
long totalSize) {
Page p = new Page(map, version);
p.pos = map.getStore().registerTempPage(p);
p.keys = keys;
p.values = values;
p.children = children;
p.pos = map.getStore().registerTempPage(p);
p.childrenSize = childrenSize;
p.totalSize = totalSize;
return p;
}
......@@ -71,7 +76,8 @@ public class Page {
* @param buff the source buffer
* @return the page
*/
static Page read(FileChannel file, BtreeMap<?, ?> map, long filePos, long pos) {
static Page read(FileChannel file, BtreeMap<?, ?> map, long filePos,
long pos) {
int maxLength = DataUtils.getPageMaxLength(pos), length = maxLength;
ByteBuffer buff;
try {
......@@ -100,7 +106,8 @@ public class Page {
return this;
}
getStore().removePage(pos);
Page newPage = create(map, writeVersion, keys, values, children);
Page newPage = create(map, writeVersion, keys, values, children,
childrenSize, totalSize);
newPage.cachedCompare = cachedCompare;
return newPage;
}
......@@ -296,6 +303,10 @@ public class Page {
return children == null;
}
private Page split(int at) {
return isLeaf() ? splitLeaf(at) : splitNode(at);
}
private Page splitLeaf(int at) {
int a = at, b = keys.length - a;
Object[] aKeys = new Object[a];
......@@ -309,7 +320,9 @@ public class Page {
System.arraycopy(values, 0, aValues, 0, a);
System.arraycopy(values, a, bValues, 0, b);
values = aValues;
Page newPage = create(map, version, bKeys, bValues, null);
totalSize = keys.length;
Page newPage = create(map, version, bKeys, bValues, null, null,
bKeys.length);
return newPage;
}
......@@ -325,97 +338,175 @@ public class Page {
System.arraycopy(children, 0, aChildren, 0, a + 1);
System.arraycopy(children, a + 1, bChildren, 0, b);
children = aChildren;
Page newPage = create(map, version, bKeys, null, bChildren);
long[] aChildrenSize = new long[a + 1];
long[] bChildrenSize = new long[b];
System.arraycopy(childrenSize, 0, aChildrenSize, 0, a + 1);
System.arraycopy(childrenSize, a + 1, bChildrenSize, 0, b);
childrenSize = aChildrenSize;
long t = 0;
for (long x : aChildrenSize) {
t += x;
}
totalSize = t;
t = 0;
for (long x : bChildrenSize) {
t += x;
}
Page newPage = create(map, version, bKeys, null, bChildren,
bChildrenSize, t);
return newPage;
}
/**
* Add or replace the key-value pair.
* Update a value for an existing key.
*
* @param map the map
* @param p the page (may not be null)
* @param writeVersion the write version
* @param key the key
* @param value the value
* @return the root page
* @throws InvalidArgumentException if this key does not exist (without
* stack trace)
*/
static Page set(BtreeMap<?, ?> map, Page p, long writeVersion, Object key,
Object value) {
if (p == null) {
throw KEY_NOT_FOUND;
}
int index = p.findKey(key);
if (p.isLeaf()) {
if (index < 0) {
throw KEY_NOT_FOUND;
}
p = p.copyOnWrite(writeVersion);
p.setValue(index, value);
return p;
}
// it is a node
if (index < 0) {
index = -index - 1;
} else {
index++;
}
Page c = map.readPage(p.children[index]);
Page c2 = set(map, c, writeVersion, key, value);
if (c != c2) {
p = p.copyOnWrite(writeVersion);
p.setChild(index, c2.getPos(), c2.getPos());
}
return p;
}
/**
* Add a new key-value pair.
*
* @param map the map
* @param p the page
* @param p the page (may be null)
* @param writeVersion the write version
* @param key the key
* @param value the value
* @return the root page
* @throws InvalidArgumentException if this key already exists (without
* stack trace)
*/
static Page put(BtreeMap<?, ?> map, Page p, long writeVersion, Object key, Object value) {
static Page add(BtreeMap<?, ?> map, Page p, long writeVersion, Object key,
Object value) {
if (p == null) {
Object[] keys = { key };
Object[] values = { value };
p = create(map, writeVersion, keys, values, null);
p = create(map, writeVersion, keys, values, null, null, 1);
return p;
}
p = p.copyOnWrite(writeVersion);
Page top = p;
Page parent = null;
int parentIndex = 0;
while (true) {
if (parent != null) {
parent.setChild(parentIndex, p.pos);
}
if (!p.isLeaf()) {
if (p.keyCount() >= map.getStore().getMaxPageSize()) {
// TODO almost duplicate code
int pos = p.keyCount() / 2;
Object k = p.keys[pos];
Page split = p.splitNode(pos);
if (parent == null) {
Object[] keys = { k };
long[] children = { p.getPos(), split.getPos() };
top = create(map, writeVersion, keys, null, children);
p = top;
} else {
parent.insert(parentIndex, k, null, split.getPos());
p = parent;
}
}
}
if (p.keyCount() >= map.getStore().getMaxPageSize()) {
// only possible if this is the root,
// otherwise we would have split earlier
p = p.copyOnWrite(writeVersion);
int at = p.keyCount() / 2;
long totalSize = p.getTotalSize();
Object k = p.keys[at];
Page split = p.split(at);
Object[] keys = { k };
long[] children = { p.getPos(), split.getPos() };
long[] childrenSize = { p.getTotalSize(), split.getTotalSize() };
p = create(map, writeVersion, keys, null, children, childrenSize,
totalSize);
// now p is a node; insert continues
} else if (p.isLeaf()) {
int index = p.findKey(key);
if (p.isLeaf()) {
if (index >= 0) {
p.setValue(index, value);
break;
}
index = -index - 1;
p.insert(index, key, value, 0);
if (p.keyCount() >= map.getStore().getMaxPageSize()) {
int pos = p.keyCount() / 2;
Object k = p.keys[pos];
Page split = p.splitLeaf(pos);
if (parent == null) {
Object[] keys = { k };
long[] children = { p.getPos(), split.getPos() };
top = create(map, writeVersion, keys, null, children);
} else {
parent.insert(parentIndex, k, null, split.getPos());
}
}
break;
if (index >= 0) {
throw KEY_ALREADY_EXISTS;
}
if (index < 0) {
index = -index - 1;
index = -index - 1;
p = p.copyOnWrite(writeVersion);
p.insert(index, key, value, 0, 0);
return p;
}
// p is a node
int index = p.findKey(key);
if (index < 0) {
index = -index - 1;
} else {
index++;
}
Page c = map.readPage(p.children[index]);
if (c.keyCount() >= map.getStore().getMaxPageSize()) {
// split on the way down
c = c.copyOnWrite(writeVersion);
int at = c.keyCount() / 2;
Object k = c.keys[at];
Page split = c.split(at);
p = p.copyOnWrite(writeVersion);
p.setChild(index, c.getPos(), c.getTotalSize());
p.insert(index, k, null, split.getPos(), split.getTotalSize());
// now we are not sure where to add
return add(map, p, writeVersion, key, value);
}
Page c2 = add(map, c, writeVersion, key, value);
p = p.copyOnWrite(writeVersion);
// the child might be the same, but not the size
p.setChild(index, c2.getPos(), c2.getTotalSize());
return p;
}
long getTotalSize() {
if (BtreeMapStore.ASSERT) {
long check = 0;
if (isLeaf()) {
check = keys.length;
} else {
index++;
for (long x : childrenSize) {
check += x;
}
}
if (check != totalSize) {
throw new AssertionError("Expected: " + check + " got: "
+ totalSize);
}
parent = p;
parentIndex = index;
p = map.readPage(p.children[index]);
p = p.copyOnWrite(writeVersion);
}
return top;
return totalSize;
}
private void setChild(int index, long value) {
long[] newChildren = new long[children.length];
System.arraycopy(children, 0, newChildren, 0, newChildren.length);
newChildren[index] = value;
children = newChildren;
private void setChild(int index, long pos, long childSize) {
if (pos != children[index]) {
long[] newChildren = new long[children.length];
System.arraycopy(children, 0, newChildren, 0, newChildren.length);
newChildren[index] = pos;
children = newChildren;
}
if (childSize != childrenSize[index]) {
long[] newChildrenSize = new long[childrenSize.length];
System.arraycopy(childrenSize, 0, newChildrenSize, 0,
newChildrenSize.length);
newChildrenSize[index] = childSize;
totalSize += newChildrenSize[index] - childrenSize[index];
childrenSize = newChildrenSize;
}
}
private void setValue(int index, Object value) {
// create a copy - not always required, but avoid unnecessary cloning
// would require a "modified" flag
// create a copy - not required if already cloned once in this version,
// but avoid unnecessary cloning would require a "modified" flag
Object[] newValues = new Object[values.length];
System.arraycopy(values, 0, newValues, 0, newValues.length);
newValues[index] = value;
......@@ -440,14 +531,18 @@ public class Page {
}
/**
* Remove a key-value pair.
* Remove an existing key-value pair.
*
* @param p the root page
* @param p the page (may not be null)
* @param writeVersion the write version
* @param key the key
* @return the new root page
* @return the new root page (null if empty)
* @throws InvalidArgumentException if not found (without stack trace)
*/
static Page remove(Page p, long writeVersion, Object key) {
static Page removeExisting(Page p, long writeVersion, Object key) {
if (p == null) {
throw KEY_NOT_FOUND;
}
int index = p.findKey(key);
if (p.isLeaf()) {
if (index >= 0) {
......@@ -458,7 +553,7 @@ public class Page {
p = p.copyOnWrite(writeVersion);
p.remove(index);
} else {
// not found
throw KEY_NOT_FOUND;
}
return p;
}
......@@ -469,25 +564,23 @@ public class Page {
index++;
}
Page c = p.map.readPage(p.children[index]);
Page c2 = remove(c, writeVersion, key);
if (c2 == c) {
// not found
} else if (c2 == null) {
// child was deleted
p = p.copyOnWrite(writeVersion);
Page c2 = removeExisting(c, writeVersion, key);
p = p.copyOnWrite(writeVersion);
if (c2 == null) {
// this child was deleted
p.remove(index);
if (p.keyCount() == 0) {
p.getStore().removePage(p.pos);
p = p.map.readPage(p.children[0]);
}
} else {
p = p.copyOnWrite(writeVersion);
p.setChild(index, c2.pos);
p.setChild(index, c2.getPos(), c2.getTotalSize());
}
return p;
}
private void insert(int index, Object key, Object value, long child) {
private void insert(int index, Object key, Object value, long child,
long childSize) {
Object[] newKeys = new Object[keys.length + 1];
DataUtils.copyWithGap(keys, newKeys, keys.length, index);
newKeys[index] = key;
......@@ -497,12 +590,20 @@ public class Page {
DataUtils.copyWithGap(values, newValues, values.length, index);
newValues[index] = value;
values = newValues;
totalSize++;
}
if (children != null) {
long[] newChildren = new long[children.length + 1];
DataUtils.copyWithGap(children, newChildren, children.length, index + 1);
DataUtils.copyWithGap(children, newChildren, children.length,
index + 1);
newChildren[index + 1] = child;
children = newChildren;
long[] newChildrenSize = new long[childrenSize.length + 1];
DataUtils.copyWithGap(childrenSize, newChildrenSize,
childrenSize.length, index + 1);
newChildrenSize[index + 1] = childSize;
childrenSize = newChildrenSize;
totalSize += childSize;
}
}
......@@ -515,11 +616,18 @@ public class Page {
Object[] newValues = new Object[values.length - 1];
DataUtils.copyExcept(values, newValues, values.length, index);
values = newValues;
totalSize--;
}
if (children != null) {
long sizeOffset = childrenSize[index];
long[] newChildren = new long[children.length - 1];
DataUtils.copyExcept(children, newChildren, children.length, index);
children = newChildren;
long[] newChildrenSize = new long[childrenSize.length - 1];
DataUtils.copyExcept(childrenSize, newChildrenSize,
childrenSize.length, index);
childrenSize = newChildrenSize;
totalSize -= sizeOffset;
}
}
......@@ -527,21 +635,24 @@ public class Page {
int start = buff.position();
int pageLength = buff.getInt();
if (pageLength > maxLength) {
throw new RuntimeException("Length too large, expected =< " + maxLength + " got " + pageLength);
throw new RuntimeException("Length too large, expected =< "
+ maxLength + " got " + pageLength);
}
short check = buff.getShort();
int mapId = DataUtils.readVarInt(buff);
if (mapId != map.getId()) {
throw new RuntimeException("Error reading page, expected map " + map.getId() + " got " + mapId);
throw new RuntimeException("Error reading page, expected map "
+ map.getId() + " got " + mapId);
}
int len = DataUtils.readVarInt(buff);
int checkTest = DataUtils.getCheckValue(chunkId) ^
DataUtils.getCheckValue(map.getId()) ^
DataUtils.getCheckValue(offset) ^
DataUtils.getCheckValue(pageLength) ^
DataUtils.getCheckValue(len);
int checkTest = DataUtils.getCheckValue(chunkId)
^ DataUtils.getCheckValue(map.getId())
^ DataUtils.getCheckValue(offset)
^ DataUtils.getCheckValue(pageLength)
^ DataUtils.getCheckValue(len);
if (check != (short) checkTest) {
throw new RuntimeException("Error in check value, expected " + checkTest + " got " + check);
throw new RuntimeException("Error in check value, expected "
+ checkTest + " got " + check);
}
keys = new Object[len];
int type = buff.get();
......@@ -565,11 +676,20 @@ public class Page {
for (int i = 0; i <= len; i++) {
children[i] = buff.getLong();
}
childrenSize = new long[len + 1];
long total = 0;
for (int i = 0; i <= len; i++) {
long s = DataUtils.readVarLong(buff);
total += s;
childrenSize[i] = s;
}
totalSize = total;
} else {
values = new Object[len];
for (int i = 0; i < len; i++) {
values[i] = map.getValueType().read(buff);
}
totalSize = len;
}
}
......@@ -583,20 +703,24 @@ public class Page {
int start = buff.position();
buff.putInt(0);
buff.putShort((byte) 0);
DataUtils.writeVarInt(buff, map.getId());
DataUtils.writeVarInt(buff, map.getId());
int len = keys.length;
DataUtils.writeVarInt(buff, len);
Compressor compressor = map.getStore().getCompressor();
int type = children != null ? DataUtils.PAGE_TYPE_NODE : DataUtils.PAGE_TYPE_LEAF;
int type = children != null ? DataUtils.PAGE_TYPE_NODE
: DataUtils.PAGE_TYPE_LEAF;
buff.put((byte) type);
int compressStart = buff.position();
for (int i = 0; i < len; i++) {
map.getKeyType().write(buff, keys[i]);
}
if (type == DataUtils.PAGE_TYPE_NODE) {
for (int i = 0; i < len + 1; i++) {
for (int i = 0; i <= len; i++) {
buff.putLong(children[i]);
}
for (int i = 0; i <= len; i++) {
DataUtils.writeVarLong(buff, childrenSize[i]);
}
} else {
for (int i = 0; i < len; i++) {
map.getValueType().write(buff, values[i]);
......@@ -613,17 +737,16 @@ public class Page {
buff.position(compressStart - 1);
buff.put((byte) (type + DataUtils.PAGE_COMPRESSED));
DataUtils.writeVarInt(buff, expLen - compLen);
buff.put(comp, 0, compLen);
buff.put(comp, 0, compLen);
}
}
int pageLength = buff.position() - start;
buff.putInt(start, pageLength);
int check =
DataUtils.getCheckValue(chunkId) ^
DataUtils.getCheckValue(map.getId()) ^
DataUtils.getCheckValue(start) ^
DataUtils.getCheckValue(pageLength) ^
DataUtils.getCheckValue(len);
int check = DataUtils.getCheckValue(chunkId)
^ DataUtils.getCheckValue(map.getId())
^ DataUtils.getCheckValue(start)
^ DataUtils.getCheckValue(pageLength)
^ DataUtils.getCheckValue(len);
buff.putShort(start + 4, (short) check);
this.pos = DataUtils.getPagePos(chunkId, start, pageLength, type);
}
......@@ -635,14 +758,16 @@ public class Page {
*/
int getMaxLengthTempRecursive() {
// length, check, map id, key length, type
int maxLength = 4 + 2 + DataUtils.MAX_VAR_INT_LEN + DataUtils.MAX_VAR_INT_LEN + 1;
int maxLength = 4 + 2 + DataUtils.MAX_VAR_INT_LEN
+ DataUtils.MAX_VAR_INT_LEN + 1;
int len = keys.length;
for (int i = 0; i < len; i++) {
maxLength += map.getKeyType().getMaxLength(keys[i]);
}
if (children != null) {
maxLength += 8 * len;
for (int i = 0; i < len + 1; i++) {
maxLength += DataUtils.MAX_VAR_LONG_LEN * len;
for (int i = 0; i <= len; i++) {
long c = children[i];
if (c < 0) {
maxLength += map.readPage(c).getMaxLengthTempRecursive();
......@@ -670,7 +795,8 @@ public class Page {
for (int i = 0; i < len; i++) {
long c = children[i];
if (c < 0) {
children[i] = map.readPage(c).writeTempRecursive(buff, chunkId);
children[i] = map.readPage(c).writeTempRecursive(buff,
chunkId);
}
}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论