提交 cc743041 authored 作者: Thomas Mueller's avatar Thomas Mueller

A persistent tree map (work in progress).

上级 6b4ced6d
...@@ -11,7 +11,17 @@ import java.io.IOException; ...@@ -11,7 +11,17 @@ import java.io.IOException;
import java.util.Properties; import java.util.Properties;
/** /**
* A chunk of data, containing one or multiple pages * A chunk of data, containing one or multiple pages.
* <p>
* Chunks are page aligned (each page is usually 4096 bytes).
* There are at most 67 million (2^26) chunks,
* each chunk is at most 2 GB large.
* File format:
* 1 byte: 'c'
* 4 bytes: length
* 4 bytes: chunk id (an incrementing number)
* 8 bytes: metaRootPos
* [ Page ] *
*/ */
class Chunk { class Chunk {
...@@ -50,6 +60,11 @@ class Chunk { ...@@ -50,6 +60,11 @@ class Chunk {
*/ */
long metaRootPos; long metaRootPos;
/**
* The version stored in this chunk.
*/
long version;
Chunk(int id) { Chunk(int id) {
this.id = id; this.id = id;
} }
...@@ -61,16 +76,17 @@ class Chunk { ...@@ -61,16 +76,17 @@ class Chunk {
* @return the block * @return the block
*/ */
static Chunk fromString(String s) { static Chunk fromString(String s) {
Chunk c = new Chunk(0);
Properties prop = new Properties(); Properties prop = new Properties();
try { try {
prop.load(new ByteArrayInputStream(s.getBytes("UTF-8"))); prop.load(new ByteArrayInputStream(s.getBytes("UTF-8")));
c.id = Integer.parseInt(prop.get("id").toString()); int id = Integer.parseInt(prop.get("id").toString());
Chunk c = new Chunk(id);
c.start = Long.parseLong(prop.get("start").toString()); c.start = Long.parseLong(prop.get("start").toString());
c.length = Long.parseLong(prop.get("length").toString()); c.length = Long.parseLong(prop.get("length").toString());
c.entryCount = Integer.parseInt(prop.get("entryCount").toString()); c.entryCount = Integer.parseInt(prop.get("entryCount").toString());
c.liveCount = Integer.parseInt(prop.get("liveCount").toString()); c.liveCount = Integer.parseInt(prop.get("liveCount").toString());
c.metaRootPos = Long.parseLong(prop.get("metaRoot").toString()); c.metaRootPos = Long.parseLong(prop.get("metaRoot").toString());
c.version = Long.parseLong(prop.get("version").toString());
return c; return c;
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
...@@ -96,7 +112,8 @@ class Chunk { ...@@ -96,7 +112,8 @@ class Chunk {
"length:" + length + "\n" + "length:" + length + "\n" +
"entryCount:" + entryCount + "\n" + "entryCount:" + entryCount + "\n" +
"liveCount:" + liveCount + "\n" + "liveCount:" + liveCount + "\n" +
"metaRoot:" + metaRootPos + "\n"; "metaRoot:" + metaRootPos + "\n" +
"version:" + version + "\n";
} }
} }
......
...@@ -45,7 +45,7 @@ public class DataUtils { ...@@ -45,7 +45,7 @@ public class DataUtils {
* @param x the value * @param x the value
* @return the length in bytes * @return the length in bytes
*/ */
static int getVarLongLen(long x) { public static int getVarLongLen(long x) {
int i = 1; int i = 1;
while (true) { while (true) {
x >>>= 7; x >>>= 7;
...@@ -97,7 +97,7 @@ public class DataUtils { ...@@ -97,7 +97,7 @@ public class DataUtils {
* @param buff the source buffer * @param buff the source buffer
* @return the value * @return the value
*/ */
static long readVarLong(ByteBuffer buff) { public static long readVarLong(ByteBuffer buff) {
long x = buff.get(); long x = buff.get();
if (x >= 0) { if (x >= 0) {
return x; return x;
...@@ -132,7 +132,7 @@ public class DataUtils { ...@@ -132,7 +132,7 @@ public class DataUtils {
* @param buff the target buffer * @param buff the target buffer
* @param x the value * @param x the value
*/ */
static void writeVarLong(ByteBuffer buff, long x) { public static void writeVarLong(ByteBuffer buff, long x) {
while ((x & ~0x7f) != 0) { while ((x & ~0x7f) != 0) {
buff.put((byte) (0x80 | (x & 0x7f))); buff.put((byte) (0x80 | (x & 0x7f)));
x >>>= 7; x >>>= 7;
...@@ -148,7 +148,7 @@ public class DataUtils { ...@@ -148,7 +148,7 @@ public class DataUtils {
* @param oldSize the size of the old array * @param oldSize the size of the old array
* @param gapIndex the index of the gap * @param gapIndex the index of the gap
*/ */
static void copyWithGap(Object src, Object dst, int oldSize, int gapIndex) { public static void copyWithGap(Object src, Object dst, int oldSize, int gapIndex) {
if (gapIndex > 0) { if (gapIndex > 0) {
System.arraycopy(src, 0, dst, 0, gapIndex); System.arraycopy(src, 0, dst, 0, gapIndex);
} }
...@@ -165,7 +165,7 @@ public class DataUtils { ...@@ -165,7 +165,7 @@ public class DataUtils {
* @param oldSize the size of the old array * @param oldSize the size of the old array
* @param removeIndex the index of the entry to remove * @param removeIndex the index of the entry to remove
*/ */
static void copyExcept(Object src, Object dst, int oldSize, int removeIndex) { public static void copyExcept(Object src, Object dst, int oldSize, int removeIndex) {
if (removeIndex > 0 && oldSize > 0) { if (removeIndex > 0 && oldSize > 0) {
System.arraycopy(src, 0, dst, 0, removeIndex); System.arraycopy(src, 0, dst, 0, removeIndex);
} }
...@@ -184,4 +184,85 @@ public class DataUtils { ...@@ -184,4 +184,85 @@ public class DataUtils {
buff.rewind(); buff.rewind();
} }
/**
* Convert the length to a length code 0..31. 31 means more than 1 MB.
*
* @param len the length
* @return the length code
*/
public static int encodeLength(int len) {
if (len <= 32) {
return 0;
}
int x = len;
int shift = 0;
while (x > 3) {
shift++;
x = (x >> 1) + (x & 1);
}
shift = Math.max(0, shift - 4);
int code = (shift << 1) + (x & 1);
return Math.min(31, code);
}
/**
* Get the chunk id from the position.
*
* @param pos the position
* @return the chunk id
*/
public static int getChunkId(long pos) {
return (int) (pos >>> 37);
}
/**
* Get the maximum length for the given code.
* For the code 31, Integer.MAX_VALUE is returned.
*
* @param pos the position
* @return the maximum length
*/
public static int getMaxLength(long pos) {
int code = (int) (pos & 31);
if (code == 31) {
return Integer.MAX_VALUE;
}
return (2 + (code & 1)) << ((code >> 1) + 4);
}
/**
* Get the offset from the position.
*
* @param pos the position
* @return the offset
*/
public static int getOffset(long pos) {
return (int) (pos >> 5);
}
/**
* Get the position of this page. The following information is encoded in
* the position: the chunk id, the offset, and the maximum length.
*
* @param chunkId the chunk id
* @param offset the offset
* @param length the length
* @return the position
*/
public static long getPos(int chunkId, int offset, int length) {
return ((long) chunkId << 37) | ((long) offset << 5) | encodeLength(length);
}
/**
* Calculate a check value for the given integer. A check value is mean to
* verify the data is consistent with a high probability, but not meant to
* protect against media failure or deliberate changes.
*
* @param x the value
* @return the check value
*/
public static short getCheckValue(int x) {
return (short) ((x >> 16) ^ x);
}
} }
...@@ -17,37 +17,48 @@ import org.h2.compress.Compressor; ...@@ -17,37 +17,48 @@ import org.h2.compress.Compressor;
* <p> * <p>
* For nodes, the key at a given index is larger than the largest key of the * For nodes, the key at a given index is larger than the largest key of the
* child at the same index. * child at the same index.
* <p>
* File format:
* page length (including length): int
* check value: short
* number of keys: varInt
* type: byte (0: leaf, 1: node; +2: compressed)
* compressed: bytes saved (varInt)
* keys
* leaf: values (one for each key)
* node: children (1 more than keys)
*/ */
public class Page { public class Page {
private final BtreeMap<?, ?> map; private final BtreeMap<?, ?> map;
private final long version;
private long pos; private long pos;
private long transaction;
private Object[] keys; private Object[] keys;
private Object[] values; private Object[] values;
private long[] children; private long[] children;
private int cachedCompare; private int cachedCompare;
private Page(BtreeMap<?, ?> map) { private Page(BtreeMap<?, ?> map, long version) {
this.map = map; this.map = map;
this.version = version;
} }
/** /**
* Create a new page. The arrays are not cloned. * Create a new page. The arrays are not cloned.
* *
* @param map the map * @param map the map
* @param version the version
* @param keys the keys * @param keys the keys
* @param values the values * @param values the values
* @param children the children * @param children the children
* @return the page * @return the page
*/ */
static Page create(BtreeMap<?, ?> map, Object[] keys, Object[] values, long[] children) { static Page create(BtreeMap<?, ?> map, long version, Object[] keys, Object[] values, long[] children) {
Page p = new Page(map); Page p = new Page(map, version);
p.keys = keys; p.keys = keys;
p.values = values; p.values = values;
p.children = children; p.children = children;
p.transaction = map.getTransaction(); p.pos = map.getStore().registerTempPage(p);
p.pos = map.registerTempPage(p);
return p; return p;
} }
...@@ -60,7 +71,7 @@ public class Page { ...@@ -60,7 +71,7 @@ public class Page {
* @return the page * @return the page
*/ */
static Page read(FileChannel file, BtreeMap<?, ?> map, long filePos, long pos) { static Page read(FileChannel file, BtreeMap<?, ?> map, long filePos, long pos) {
int maxLength = Page.getMaxLength(pos), length = maxLength; int maxLength = DataUtils.getMaxLength(pos), length = maxLength;
ByteBuffer buff; ByteBuffer buff;
try { try {
file.position(filePos); file.position(filePos);
...@@ -75,20 +86,20 @@ public class Page { ...@@ -75,20 +86,20 @@ public class Page {
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
Page p = new Page(map); Page p = new Page(map, 0);
p.pos = pos; p.pos = pos;
p.read(buff, maxLength); int chunkId = DataUtils.getChunkId(pos);
int offset = DataUtils.getOffset(pos);
p.read(buff, chunkId, offset, maxLength);
return p; return p;
} }
private Page copyOnWrite() { private Page copyOnWrite(long writeVersion) {
long t = map.getTransaction(); if (version == writeVersion) {
if (transaction == t) {
return this; return this;
} }
map.removePage(pos); getStore().removePage(pos);
Page newPage = create(map, keys, values, children); Page newPage = create(map, writeVersion, keys, values, children);
newPage.transaction = t;
newPage.cachedCompare = cachedCompare; newPage.cachedCompare = cachedCompare;
return newPage; return newPage;
} }
...@@ -297,7 +308,7 @@ public class Page { ...@@ -297,7 +308,7 @@ public class Page {
System.arraycopy(values, 0, aValues, 0, a); System.arraycopy(values, 0, aValues, 0, a);
System.arraycopy(values, a, bValues, 0, b); System.arraycopy(values, a, bValues, 0, b);
values = aValues; values = aValues;
Page newPage = create(map, bKeys, bValues, null); Page newPage = create(map, version, bKeys, bValues, null);
return newPage; return newPage;
} }
...@@ -313,7 +324,7 @@ public class Page { ...@@ -313,7 +324,7 @@ public class Page {
System.arraycopy(children, 0, aChildren, 0, a + 1); System.arraycopy(children, 0, aChildren, 0, a + 1);
System.arraycopy(children, a + 1, bChildren, 0, b); System.arraycopy(children, a + 1, bChildren, 0, b);
children = aChildren; children = aChildren;
Page newPage = create(map, bKeys, null, bChildren); Page newPage = create(map, version, bKeys, null, bChildren);
return newPage; return newPage;
} }
...@@ -322,18 +333,19 @@ public class Page { ...@@ -322,18 +333,19 @@ public class Page {
* *
* @param map the map * @param map the map
* @param p the page * @param p the page
* @param writeVersion the write version
* @param key the key * @param key the key
* @param value the value * @param value the value
* @return the root page * @return the root page
*/ */
static Page put(BtreeMap<?, ?> map, Page p, Object key, Object value) { static Page put(BtreeMap<?, ?> map, Page p, long writeVersion, Object key, Object value) {
if (p == null) { if (p == null) {
Object[] keys = { key }; Object[] keys = { key };
Object[] values = { value }; Object[] values = { value };
p = create(map, keys, values, null); p = create(map, writeVersion, keys, values, null);
return p; return p;
} }
p = p.copyOnWrite(); p = p.copyOnWrite(writeVersion);
Page top = p; Page top = p;
Page parent = null; Page parent = null;
int parentIndex = 0; int parentIndex = 0;
...@@ -350,7 +362,7 @@ public class Page { ...@@ -350,7 +362,7 @@ public class Page {
if (parent == null) { if (parent == null) {
Object[] keys = { k }; Object[] keys = { k };
long[] children = { p.getPos(), split.getPos() }; long[] children = { p.getPos(), split.getPos() };
top = create(map, keys, null, children); top = create(map, writeVersion, keys, null, children);
p = top; p = top;
} else { } else {
parent.insert(parentIndex, k, null, split.getPos()); parent.insert(parentIndex, k, null, split.getPos());
...@@ -373,7 +385,7 @@ public class Page { ...@@ -373,7 +385,7 @@ public class Page {
if (parent == null) { if (parent == null) {
Object[] keys = { k }; Object[] keys = { k };
long[] children = { p.getPos(), split.getPos() }; long[] children = { p.getPos(), split.getPos() };
top = create(map, keys, null, children); top = create(map, writeVersion, keys, null, children);
} else { } else {
parent.insert(parentIndex, k, null, split.getPos()); parent.insert(parentIndex, k, null, split.getPos());
} }
...@@ -388,7 +400,7 @@ public class Page { ...@@ -388,7 +400,7 @@ public class Page {
parent = p; parent = p;
parentIndex = index; parentIndex = index;
p = map.readPage(p.children[index]); p = map.readPage(p.children[index]);
p = p.copyOnWrite(); p = p.copyOnWrite(writeVersion);
} }
return top; return top;
} }
...@@ -418,25 +430,26 @@ public class Page { ...@@ -418,25 +430,26 @@ public class Page {
map.readPage(c).removeAllRecursive(); map.readPage(c).removeAllRecursive();
} }
} }
map.removePage(pos); getStore().removePage(pos);
} }
/** /**
* Remove a key-value pair. * Remove a key-value pair.
* *
* @param p the root page * @param p the root page
* @param writeVersion the write version
* @param key the key * @param key the key
* @return the new root page * @return the new root page
*/ */
static Page remove(Page p, Object key) { static Page remove(Page p, long writeVersion, Object key) {
int index = p.findKey(key); int index = p.findKey(key);
if (p.isLeaf()) { if (p.isLeaf()) {
if (index >= 0) { if (index >= 0) {
if (p.keyCount() == 1) { if (p.keyCount() == 1) {
p.map.removePage(p.pos); p.getStore().removePage(p.pos);
return null; return null;
} }
p = p.copyOnWrite(); p = p.copyOnWrite(writeVersion);
p.remove(index); p.remove(index);
} else { } else {
// not found // not found
...@@ -450,19 +463,19 @@ public class Page { ...@@ -450,19 +463,19 @@ public class Page {
index++; index++;
} }
Page c = p.map.readPage(p.children[index]); Page c = p.map.readPage(p.children[index]);
Page c2 = remove(c, key); Page c2 = remove(c, writeVersion, key);
if (c2 == c) { if (c2 == c) {
// not found // not found
} else if (c2 == null) { } else if (c2 == null) {
// child was deleted // child was deleted
p = p.copyOnWrite(); p = p.copyOnWrite(writeVersion);
p.remove(index); p.remove(index);
if (p.keyCount() == 0) { if (p.keyCount() == 0) {
p.map.removePage(p.pos); p.getStore().removePage(p.pos);
p = p.map.readPage(p.children[0]); p = p.map.readPage(p.children[0]);
} }
} else { } else {
p = p.copyOnWrite(); p = p.copyOnWrite(writeVersion);
p.setChild(index, c2.pos); p.setChild(index, c2.pos);
} }
return p; return p;
...@@ -504,31 +517,36 @@ public class Page { ...@@ -504,31 +517,36 @@ public class Page {
} }
} }
private void read(ByteBuffer buff, int maxLength) { private void read(ByteBuffer buff, int chunkId, int offset, int maxLength) {
int start = buff.position(); int start = buff.position();
int len = buff.getInt(); int pageLength = buff.getInt();
if (len > maxLength) { if (pageLength > maxLength) {
throw new RuntimeException("Length too large, expected < " + maxLength + " got " + len); throw new RuntimeException("Length too large, expected =< " + maxLength + " got " + pageLength);
} }
int mapId = DataUtils.readVarInt(buff); short check = buff.getShort();
if (mapId != map.getId()) { int len = DataUtils.readVarInt(buff);
throw new RuntimeException("Page pos mismatch, expected " + map.getId() + " got " + mapId); int checkTest = DataUtils.getCheckValue(chunkId) ^
DataUtils.getCheckValue(map.getId()) ^
DataUtils.getCheckValue(offset) ^
DataUtils.getCheckValue(pageLength) ^
DataUtils.getCheckValue(len);
if (check != (short) checkTest) {
throw new RuntimeException("Error in check value, expected " + checkTest + " got " + check);
} }
keys = new Object[len];
int type = buff.get(); int type = buff.get();
boolean node = (type & 1) != 0; boolean node = (type & 1) != 0;
boolean compressed = (type & 2) != 0; boolean compressed = (type & 2) != 0;
if (compressed) { if (compressed) {
Compressor compressor = map.getStore().getCompressor(); Compressor compressor = map.getStore().getCompressor();
int lenAdd = DataUtils.readVarInt(buff); int lenAdd = DataUtils.readVarInt(buff);
int compLen = len + start - buff.position(); int compLen = pageLength + start - buff.position();
byte[] comp = new byte[compLen]; byte[] comp = new byte[compLen];
buff.get(comp); buff.get(comp);
byte[] exp = new byte[compLen + lenAdd]; byte[] exp = new byte[compLen + lenAdd];
compressor.expand(comp, 0, compLen, exp, 0, exp.length); compressor.expand(comp, 0, compLen, exp, 0, exp.length);
buff = ByteBuffer.wrap(exp); buff = ByteBuffer.wrap(exp);
} }
len = DataUtils.readVarInt(buff);
keys = new Object[len];
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
keys[i] = map.getKeyType().read(buff); keys[i] = map.getKeyType().read(buff);
} }
...@@ -554,13 +572,13 @@ public class Page { ...@@ -554,13 +572,13 @@ public class Page {
private void write(ByteBuffer buff, int chunkId) { private void write(ByteBuffer buff, int chunkId) {
int start = buff.position(); int start = buff.position();
buff.putInt(0); buff.putInt(0);
DataUtils.writeVarInt(buff, map.getId()); buff.putShort((byte) 0);
int len = keys.length;
DataUtils.writeVarInt(buff, len);
Compressor compressor = map.getStore().getCompressor(); Compressor compressor = map.getStore().getCompressor();
int type = children != null ? 1 : 0; int type = children != null ? 1 : 0;
buff.put((byte) type); buff.put((byte) type);
int compressStart = buff.position(); int compressStart = buff.position();
int len = keys.length;
DataUtils.writeVarInt(buff, len);
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
map.getKeyType().write(buff, keys[i]); map.getKeyType().write(buff, keys[i]);
} }
...@@ -574,22 +592,29 @@ public class Page { ...@@ -574,22 +592,29 @@ public class Page {
} }
} }
if (compressor != null) { if (compressor != null) {
len = buff.position() - compressStart; int expLen = buff.position() - compressStart;
byte[] exp = new byte[len]; byte[] exp = new byte[expLen];
buff.position(compressStart); buff.position(compressStart);
buff.get(exp); buff.get(exp);
byte[] comp = new byte[exp.length * 2]; byte[] comp = new byte[exp.length * 2];
int compLen = compressor.compress(exp, exp.length, comp, 0); int compLen = compressor.compress(exp, exp.length, comp, 0);
if (compLen + DataUtils.getVarIntLen(compLen - len) < len) { if (compLen + DataUtils.getVarIntLen(compLen - expLen) < expLen) {
buff.position(compressStart - 1); buff.position(compressStart - 1);
buff.put((byte) (type + 2)); buff.put((byte) (type + 2));
DataUtils.writeVarInt(buff, len - compLen); DataUtils.writeVarInt(buff, expLen - compLen);
buff.put(comp, 0, compLen); buff.put(comp, 0, compLen);
} }
} }
len = buff.position() - start; int pageLength = buff.position() - start;
buff.putInt(start, len); buff.putInt(start, pageLength);
this.pos = Page.getPos(chunkId, start, len); int check =
DataUtils.getCheckValue(chunkId) ^
DataUtils.getCheckValue(map.getId()) ^
DataUtils.getCheckValue(start) ^
DataUtils.getCheckValue(pageLength) ^
DataUtils.getCheckValue(len);
buff.putShort(start + 4, (short) check);
this.pos = DataUtils.getPos(chunkId, start, pageLength);
} }
/** /**
...@@ -598,9 +623,9 @@ public class Page { ...@@ -598,9 +623,9 @@ public class Page {
* @return the next page id * @return the next page id
*/ */
int getMaxLengthTempRecursive() { int getMaxLengthTempRecursive() {
int maxLength = 4 + DataUtils.MAX_VAR_INT_LEN + 1; // length, check, key length, type
int maxLength = 4 + 2 + DataUtils.MAX_VAR_INT_LEN + 1;
int len = keys.length; int len = keys.length;
maxLength += DataUtils.MAX_VAR_INT_LEN;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
maxLength += map.getKeyType().getMaxLength(keys[i]); maxLength += map.getKeyType().getMaxLength(keys[i]);
} }
...@@ -661,73 +686,12 @@ public class Page { ...@@ -661,73 +686,12 @@ public class Page {
return count; return count;
} }
/** BtreeMapStore getStore() {
* Get the chunk id from the position. return map.getStore();
*
* @param pos the position
* @return the chunk id
*/
static int getChunkId(long pos) {
return (int) (pos >>> 37);
}
/**
* Get the offset from the position.
*
* @param pos the position
* @return the offset
*/
public static long getOffset(long pos) {
return (int) (pos >> 5);
}
/**
* Get the position of this page. The following information is encoded in
* the position: the chunk id, the offset, and the maximum length.
*
* @param chunkId the chunk id
* @param offset the offset
* @param length the length
* @return the position
*/
static long getPos(int chunkId, int offset, int length) {
return ((long) chunkId << 37) | ((long) offset << 5) | encodeLength(length);
}
/**
* Convert the length to a length code 0..31. 31 means more than 1 MB.
*
* @param len the length
* @return the length code
*/
public static int encodeLength(int len) {
if (len <= 32) {
return 0;
}
int x = len;
int shift = 0;
while (x > 3) {
shift++;
x = (x >> 1) + (x & 1);
}
shift = Math.max(0, shift - 4);
int code = (shift << 1) + (x & 1);
return Math.min(31, code);
} }
/** long getVersion() {
* Get the maximum length for the given code. return version;
* For the code 31, Integer.MAX_VALUE is returned.
*
* @param pos the position
* @return the maximum length
*/
public static int getMaxLength(long pos) {
int code = (int) (pos & 31);
if (code == 31) {
return Integer.MAX_VALUE;
}
return (2 + (code & 1)) << ((code >> 1) + 4);
} }
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论