提交 7ac42788 authored 作者: Thomas Mueller's avatar Thomas Mueller

A persistent tree map (work in progress) which might replace the storage of the…

A persistent tree map (work in progress) which might replace the storage of the database at some point in the far future.
上级 bb60844f
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.dev.store;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.util.Map;
import org.h2.store.fs.FileBase;
import org.h2.store.fs.FilePathWrapper;
import org.h2.util.SmallLRUCache;
/**
* A file with a read cache.
*/
public class FilePathCache extends FilePathWrapper {
public static FileChannel wrap(FileChannel f) throws IOException {
return new FileCache(f);
}
public FileChannel open(String mode) throws IOException {
return new FileCache(getBase().open(mode));
}
public String getScheme() {
return "cache";
}
/**
* A file with a read cache.
*/
public static class FileCache extends FileBase {
private static final int CACHE_BLOCK_SIZE = 4 * 1024;
private final FileChannel base;
private long pos, posBase, size;
private final Map<Long, ByteBuffer> cache = SmallLRUCache.newInstance(16);
FileCache(FileChannel base) throws IOException {
this.base = base;
this.size = base.size();
}
public FileChannel position(long newPosition) throws IOException {
this.pos = newPosition;
return this;
}
public long position() throws IOException {
return pos;
}
private void positionBase(long pos) throws IOException {
if (posBase != pos) {
base.position(pos);
posBase = pos;
}
}
public int read(ByteBuffer dst) throws IOException {
long cachePos = getCachePos(pos);
int off = (int) (pos - cachePos);
int len = CACHE_BLOCK_SIZE - off;
ByteBuffer buff = cache.get(cachePos);
if (buff == null) {
buff = ByteBuffer.allocate(CACHE_BLOCK_SIZE);
positionBase(cachePos);
int read = base.read(buff);
posBase += read;
if (read == CACHE_BLOCK_SIZE) {
cache.put(cachePos, buff);
} else {
if (read < 0) {
return -1;
}
len = Math.min(len, read);
}
}
len = Math.min(len, dst.remaining());
System.arraycopy(buff.array(), off, dst.array(), dst.position(), len);
dst.position(dst.position() + len);
pos += len;
return len;
}
private long getCachePos(long pos) {
return (pos / CACHE_BLOCK_SIZE) * CACHE_BLOCK_SIZE;
}
public long size() throws IOException {
return size;
}
public FileChannel truncate(long newSize) throws IOException {
cache.clear();
base.truncate(newSize);
size = Math.min(size, newSize);
pos = Math.min(pos, newSize);
posBase = pos;
return this;
}
public int write(ByteBuffer src) throws IOException {
if (cache.size() > 0) {
for (long p = getCachePos(pos), len = src.remaining(); len > 0; p += CACHE_BLOCK_SIZE, len -= CACHE_BLOCK_SIZE) {
cache.remove(p);
}
}
positionBase(pos);
int len = base.write(src);
posBase += len;
pos += len;
size = Math.max(size, pos);
return len;
}
public void force(boolean metaData) throws IOException {
base.force(metaData);
}
public FileLock tryLock(long position, long size, boolean shared) throws IOException {
return base.tryLock(position, size, shared);
}
public String toString() {
return "cache:" + base.toString();
}
}
}
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.dev.store;
import java.nio.ByteBuffer;
/**
* A left-leaning red black tree implementation.
*/
class Node {
private static final int FLAG_BLACK = 1;
// private static final int FLAG_BACK_REFERENCES = 2;
private final TreeMapStore store;
private long id;
private long leftId, rightId;
private long transaction;
private Object key;
private Object data;
private Node left, right;
private int flags;
private Node(TreeMapStore store) {
this.store = store;
}
static Node create(TreeMapStore store, Object key, Object data) {
Node n = new Node(store);
n.key = key;
n.data = data;
n.transaction = store.getTransaction();
n.id = store.nextTempNodeId();
return n;
}
static Node load(TreeMapStore store, long id, ByteBuffer buff) {
Node n = new Node(store);
n.id = id;
n.load(buff);
return n;
}
Node getLeft() {
if (left == null && leftId != 0) {
left = store.loadNode(leftId);
}
return left;
}
Node getRight() {
if (right == null && rightId != 0) {
right = store.loadNode(rightId);
}
return right;
}
long getLeftId() {
return leftId;
}
void setLeftId(long leftId) {
this.leftId = leftId;
left = null;
}
long getRightId() {
return rightId;
}
void setRightId(long rightId) {
this.rightId = rightId;
left = null;
}
private void setLeft(Node l) {
this.left = l;
this.leftId = l == null ? 0 : l.getId();
}
private void setRight(Node r) {
this.right = r;
this.rightId = r == null ? 0 : r.getId();
}
private Node copyOnWrite(long writeTransaction) {
if (writeTransaction == transaction) {
return this;
}
store.removeNode(id);
Node n2 = create(store, key, data);
n2.leftId = leftId;
n2.left = left;
n2.rightId = rightId;
n2.right = right;
n2.flags = flags;
return n2;
}
public String toString() {
StringBuilder buff = new StringBuilder();
buff.append(key);
if (left != null || right != null || leftId != 0 || rightId != 0) {
buff.append("{");
if (left != null) {
buff.append(left.toString());
} else if (leftId != 0) {
buff.append(leftId);
}
buff.append(",");
if (right != null) {
buff.append(right.toString());
} else if (rightId != 0) {
buff.append(rightId);
}
buff.append("}");
}
return buff.toString();
}
private void flipColor() {
flags = flags ^ FLAG_BLACK;
setLeft(getLeft().copyOnWrite(transaction));
getLeft().flags = getLeft().flags ^ FLAG_BLACK;
setRight(getRight().copyOnWrite(transaction));
getRight().flags = getRight().flags ^ FLAG_BLACK;
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public Object getKey() {
return key;
}
public Object getData() {
return data;
}
private Node rotateLeft() {
Node x = getRight().copyOnWrite(store.getTransaction());
setRight(x.getLeft());
x.setLeft(this);
x.flags = flags;
// make red
flags = flags & ~FLAG_BLACK;
return x;
}
private Node rotateRight() {
Node x = getLeft().copyOnWrite(store.getTransaction());
setLeft(x.getRight());
x.setRight(this);
x.flags = flags;
// make red
flags = flags & ~FLAG_BLACK;
return x;
}
private Node moveRedLeft() {
flipColor();
if (isRed(getRight().getLeft())) {
setRight(getRight().rotateRight());
Node n = rotateLeft();
n.flipColor();
return n;
}
return this;
}
private Node moveRedRight() {
flipColor();
if (isRed(getLeft().getLeft())) {
Node n = rotateRight();
n.flipColor();
return n;
}
return this;
}
private Node min() {
Node n = this;
while (n.getLeft() != null) {
n = n.getLeft();
}
return n;
}
private Node deleteMin() {
if (getLeft() == null) {
store.removeNode(id);
return null;
}
Node n = copyOnWrite(transaction);
if (!isRed(n.getLeft()) && !isRed(n.getLeft().getLeft())) {
n = n.moveRedLeft();
}
n.setLeft(n.getLeft().deleteMin());
return n.fixUp();
}
static Node remove(Node n, Object key) {
if (findNode(n, key) == null) {
return n;
}
return n.delete(key);
}
private int compare(Object key) {
return store.compare(key, this.key);
}
private Node delete(Object key) {
Node n = copyOnWrite(transaction);
if (store.compare(key, n) < 0) {
if (!isRed(n.getLeft()) && !isRed(n.getLeft().getLeft())) {
n = n.moveRedLeft();
}
n.setLeft(n.getLeft().delete(key));
} else {
if (isRed(n.getLeft())) {
n = n.rotateRight();
}
if (n.compare(key) == 0 && n.getRight() == null) {
store.removeNode(id);
return null;
}
if (!isRed(n.getRight()) && !isRed(n.getRight().getLeft())) {
n = n.moveRedRight();
}
if (n.compare(key) == 0) {
Node min = n.getRight().min();
n.key = min.key;
n.data = min.data;
n.setRight(n.getRight().deleteMin());
} else {
n.setRight(n.getRight().delete(key));
}
}
return n.fixUp();
}
static Node findNode(Node n, Object key) {
while (n != null) {
int compare = n.compare(key);
if (compare == 0) {
return n;
} else if (compare > 0) {
n = n.getRight();
} else {
n = n.getLeft();
}
}
return null;
}
static Node add(TreeMapStore store, Node n, Object key, Object data) {
if (n == null) {
n = Node.create(store, key, data);
return n;
}
n = n.copyOnWrite(store.getTransaction());
int compare = n.compare(key);
if (compare == 0) {
n.data = data;
} else if (compare < 0) {
n.setLeft(add(store, n.getLeft(), key, data));
} else {
n.setRight(add(store, n.getRight(), key, data));
}
return n.fixUp();
}
private Node fixUp() {
Node n = this;
if (isRed(getRight())) {
n = rotateLeft();
}
if (isRed(n.getLeft()) && isRed(n.getLeft().getLeft())) {
n = n.rotateRight();
}
if (isRed(n.getLeft()) && isRed(n.getRight())) {
n.flipColor();
}
return n;
}
private boolean isRed(Node n) {
return n != null && (n.flags & FLAG_BLACK) == 0;
}
private void load(ByteBuffer buff) {
flags = buff.get();
leftId = buff.getLong();
rightId = buff.getLong();
key = store.getKeyType().read(buff);
data = store.getValueType().read(buff);
}
void store(ByteBuffer buff) {
buff.put((byte) flags);
buff.putLong(leftId);
buff.putLong(rightId);
store.getKeyType().write(buff, key);
store.getValueType().write(buff, data);
}
int length() {
return store.getKeyType().length(key) +
store.getValueType().length(data) + 17;
}
}
/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.dev.store;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Properties;
import java.util.TreeSet;
import org.h2.store.fs.FilePath;
import org.h2.util.SmallLRUCache;
/*
file format:
header
header
[ transaction log | data ] *
header:
# H3 store #
pageSize=4096
r=1
data:
'd' [length] root ...
transaction log:
't' [length] ...
todo:
- garbage collection
- use page checksums
- compress chunks
- encode length in pos (1=32, 2=128, 3=512,...)
- don't use any 't' blocks
- floating header (avoid duplicate header)
for each chunk, store chunk (a counter)
for each page, store chunk id and offset to root
for each chunk, store position of expected next chunks
*/
/**
* A persistent tree map.
*/
public class TreeMapStore {
private final KeyType keyType;
private final ValueType valueType;
private final String fileName;
private FileChannel file;
private int pageSize = 4 * 1024;
private long rootPos;
private HashMap<Long, Node> cache = SmallLRUCache.newInstance(50000);
private TreeSet<Block> blocks = new TreeSet<Block>();
// TODO use an int instead? (with rollover to 0)
private long transaction;
private int tempNodeId;
private long storePos;
private Node root;
private int loadCount;
private TreeMapStore(String fileName, Class<?> keyClass, Class<?> valueClass) {
this.fileName = fileName;
if (keyClass == Integer.class) {
keyType = new IntegerType();
} else if (keyClass == String.class) {
keyType = new StringType();
} else {
throw new RuntimeException("Unsupported key class " + keyClass.toString());
}
if (valueClass == Integer.class) {
valueType = new IntegerType();
} else if (valueClass == String.class) {
valueType = new StringType();
} else {
throw new RuntimeException("Unsupported value class " + keyClass.toString());
}
}
static TreeMapStore open(String fileName, Class<?> keyClass, Class<?> valueClass) {
TreeMapStore s = new TreeMapStore(fileName, keyClass, valueClass);
s.open();
return s;
}
void open() {
new File(fileName).getParentFile().mkdirs();
try {
file = FilePathCache.wrap(FilePath.get(fileName).open("rw"));
if (file.size() == 0) {
writeHeader();
storePos = pageSize * 2;
} else {
readHeader();
if (rootPos > 0) {
root = loadNode(rootPos);
}
}
} catch (Exception e) {
throw convert(e);
}
}
private void writeHeader() {
try {
ByteBuffer header = ByteBuffer.wrap((
"# H2 1.5\n" +
"read-version: 1\n" +
"write-version: 1\n" +
"root: " + rootPos + "\n" +
"transaction: " + transaction + "\n" +
"storePos: " + storePos + "\n").getBytes());
file.position(0);
file.write(header);
file.position(pageSize);
file.write(header);
} catch (Exception e) {
throw convert(e);
}
}
private void readHeader() {
try {
file.position(0);
byte[] header = new byte[pageSize];
// TODO read fully; read both headers
file.read(ByteBuffer.wrap(header));
Properties prop = new Properties();
prop.load(new ByteArrayInputStream(header));
rootPos = Long.parseLong(prop.get("root").toString());
storePos = Long.parseLong(prop.get("storePos").toString());
transaction = Long.parseLong(prop.get("transaction").toString());
} catch (Exception e) {
throw convert(e);
}
}
public String toString() {
return "cache size: " + cache.size() + " loadCount: " + this.loadCount + " " + blocks;
}
private static RuntimeException convert(Exception e) {
throw new RuntimeException("Exception: " + e, e);
}
public void close() {
if (root != null && root.getId() < 0) {
store();
}
if (file != null) {
try {
file.close();
} catch (Exception e) {
file = null;
throw convert(e);
}
}
}
private long updateId(Node n, long offset) {
n.setId(offset);
cache.put(offset, n);
offset += n.length();
if (n.getLeftId() < 0) {
offset = updateId(n.getLeft(), offset);
}
if (n.getRightId() < 0) {
offset = updateId(n.getRight(), offset);
}
return offset;
}
private int store(ByteBuffer buff, Node n) {
Node left = n.getLeftId() < 0 ? n.getLeft() : null;
if (left != null) {
n.setLeftId(left.getId());
}
Node right = n.getRightId() < 0 ? n.getRight() : null;
if (right != null) {
n.setRightId(right.getId());
}
int count = 1;
n.store(buff);
if (left != null) {
count += store(buff, left);
}
if (right != null) {
count += store(buff, right);
}
return count;
}
void store() {
if (root == null || root.getId() >= 0) {
// TODO truncate file if empty
return;
}
commit();
Block b = new Block(storePos);
b.transaction = transaction;
long end = updateId(root, storePos + 1);
ByteBuffer buff = ByteBuffer.allocate((int) (end - storePos));
buff.put((byte) 'd');
b.entryCount = store(buff, root);
b.liveCount = b.entryCount;
b.length = buff.limit();
blocks.add(b);
if (buff.hasRemaining()) {
throw new RuntimeException("remaining: " + buff.remaining());
}
buff.rewind();
try {
file.position(storePos);
file.write(buff);
} catch (IOException e) {
throw new RuntimeException(e);
}
storePos = end;
rootPos = root.getId();
writeHeader();
tempNodeId = 0;
}
public long getTransaction() {
return transaction;
}
public long nextTempNodeId() {
return -(++tempNodeId);
}
public long commit() {
return ++transaction;
}
public void add(Object key, Object data) {
root = Node.add(this, root, key, data);
}
public void remove(Object key) {
root = Node.remove(root, key);
}
public Object find(Object key) {
Node n = Node.findNode(root, key);
return n == null ? null : n.getData();
}
public Object getRoot() {
return root;
}
public Node loadNode(long id) {
Node n = cache.get(id);
if (n == null) {
try {
file.position(id);
ByteBuffer buff = ByteBuffer.wrap(new byte[1024]);
// TODO read fully; read only required bytes
do {
int len = file.read(buff);
if (len < 0) {
break;
}
} while (buff.remaining() > 0);
buff.rewind();
n = Node.load(this, id, buff);
} catch (Exception e) {
throw new RuntimeException(e);
}
cache.put(id, n);
}
return n;
}
static int getVarIntLen(int x) {
if ((x & (-1 << 7)) == 0) {
return 1;
} else if ((x & (-1 << 14)) == 0) {
return 2;
} else if ((x & (-1 << 21)) == 0) {
return 3;
} else if ((x & (-1 << 28)) == 0) {
return 4;
}
return 5;
}
static void writeVarInt(ByteBuffer buff, int x) {
while ((x & ~0x7f) != 0) {
buff.put((byte) (0x80 | (x & 0x7f)));
x >>>= 7;
}
buff.put((byte) x);
}
static int readVarInt(ByteBuffer buff) {
int b = buff.get();
if (b >= 0) {
return b;
}
// a separate function so that this one can be inlined
return readVarIntRest(buff, b);
}
static int readVarIntRest(ByteBuffer buff, int b) {
int x = b & 0x7f;
b = buff.get();
if (b >= 0) {
return x | (b << 7);
}
x |= (b & 0x7f) << 7;
b = buff.get();
if (b >= 0) {
return x | (b << 14);
}
x |= (b & 0x7f) << 14;
b = buff.get();
if (b >= 0) {
return x | b << 21;
}
x |= ((b & 0x7f) << 21) | (buff.get() << 28);
return x;
}
int compare(Object a, Object b) {
return keyType.compare(a, b);
}
/**
* A value type.
*/
static interface ValueType {
int length(Object obj);
void write(ByteBuffer buff, Object x);
Object read(ByteBuffer buff);
}
/**
* A key type.
*/
static interface KeyType extends ValueType {
int compare(Object a, Object b);
}
/**
* An integer type.
*/
static class IntegerType implements KeyType {
public int compare(Object a, Object b) {
return ((Integer) a).compareTo((Integer) b);
}
public int length(Object obj) {
return getVarIntLen((Integer) obj);
}
public Integer read(ByteBuffer buff) {
return readVarInt(buff);
}
public void write(ByteBuffer buff, Object x) {
writeVarInt(buff, (Integer) x);
}
}
/**
* A string type.
*/
static class StringType implements KeyType {
public int compare(Object a, Object b) {
return a.toString().compareTo(b.toString());
}
public int length(Object obj) {
try {
byte[] bytes = obj.toString().getBytes("UTF-8");
return getVarIntLen(bytes.length) + bytes.length;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public String read(ByteBuffer buff) {
int len = readVarInt(buff);
byte[] bytes = new byte[len];
buff.get(bytes);
try {
return new String(bytes, "UTF-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void write(ByteBuffer buff, Object x) {
try {
byte[] bytes = x.toString().getBytes("UTF-8");
writeVarInt(buff, bytes.length);
buff.put(bytes);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
KeyType getKeyType() {
return keyType;
}
ValueType getValueType() {
return valueType;
}
void removeNode(long id) {
if (id > 0) {
getBlock(id).liveCount--;
}
}
private Block getBlock(long pos) {
return blocks.lower(new Block(pos));
}
public Cursor cursor() {
return new Cursor(root);
}
/**
* A cursor to iterate over all elements.
*/
public static class Cursor {
Node current;
ArrayList<Node> parents = new ArrayList<Node>();
Cursor(Node root) {
min(root);
}
void min(Node n) {
while (true) {
Node x = n.getLeft();
if (x == null) {
break;
}
parents.add(n);
n = x;
}
current = n;
}
Object next() {
Node c = current;
if (c != null) {
fetchNext();
}
return c == null ? null : c.getKey();
}
private void fetchNext() {
Node r = current.getRight();
if (r != null) {
min(r);
return;
}
if (parents.size() == 0) {
current = null;
return;
}
current = parents.remove(parents.size() - 1);
}
}
/**
* A cursor to iterate beginning from the root
* (not in ascending order).
*/
public static class RootCursor {
Node current;
ArrayList<Node> parents = new ArrayList<Node>();
RootCursor(Node root) {
current = root;
}
Object next() {
Node c = current;
if (c != null) {
fetchNext();
}
return c == null ? null : c.getKey();
}
private void fetchNext() {
Node l = current.getLeft();
if (l != null) {
parents.add(current);
current = l;
return;
}
while (true) {
Node r = current.getRight();
if (r != null) {
current = r;
return;
}
if (parents.size() == 0) {
current = null;
return;
}
current = parents.remove(parents.size() - 1);
}
}
}
/**
* A block of data.
*/
static class Block implements Comparable<Block> {
long transaction;
long start;
long length;
int entryCount;
int liveCount;
int referencesToOthers;
Block(long start) {
this.start = start;
}
public int compareTo(Block o) {
return start == o.start ? 0 : start < o.start ? -1 : 1;
}
public String toString() {
return "[" + start + "-" + (start + length - 1) + " c:" + entryCount + " l:"
+ liveCount + " " + (100 * liveCount / entryCount) + "%]";
}
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论