提交 484563de authored 作者: Thomas Mueller's avatar Thomas Mueller

A persistent multi-version map: split based on size (memory), not number of…

A persistent multi-version map: split based on size (memory), not number of entries; allow to estimate the memory used by unsaved changes
上级 443e21c9
......@@ -164,18 +164,20 @@ public class MVRTreeMap<K, V> extends MVMap<K, V> {
Page p = root.copyOnWrite(writeVersion);
Object result;
if (alwaysAdd || get(key) == null) {
if (p.getKeyCount() > store.getMaxPageSize()) {
if (p.getMemory() > store.getMaxPageSize() && p.getKeyCount() > 1) {
// only possible if this is the root, else we would have split earlier
// (this requires maxPageSize is fixed)
long totalCount = p.getTotalCount();
Page split = split(p, writeVersion);
Object[] keys = { getBounds(p), getBounds(split) };
Object k1 = getBounds(p);
Object k2 = getBounds(split);
Object[] keys = { k1, k2 };
long[] children = { p.getPos(), split.getPos(), 0 };
Page[] childrenPages = { p, split, null };
long[] counts = { p.getTotalCount(), split.getTotalCount(), 0 };
p = Page.create(this, writeVersion, 2,
keys, null, children, childrenPages, counts,
totalCount, 0);
totalCount, 0, 0);
// now p is a node; continues
}
add(p, writeVersion, key, value);
......@@ -244,7 +246,7 @@ public class MVRTreeMap<K, V> extends MVMap<K, V> {
}
}
Page c = p.getChildPage(index).copyOnWrite(writeVersion);
if (c.getKeyCount() >= store.getMaxPageSize()) {
if (c.getMemory() > store.getMaxPageSize() && c.getKeyCount() > 1) {
// split on the way down
Page split = split(c, writeVersion);
p = p.copyOnWrite(writeVersion);
......@@ -365,7 +367,7 @@ public class MVRTreeMap<K, V> extends MVMap<K, V> {
long[] c = leaf ? null : new long[1];
Page[] cp = leaf ? null : new Page[1];
return Page.create(this, writeVersion, 0,
new Object[4], values, c, cp, c, 0, 0);
new Object[4], values, c, cp, c, 0, 0, 0);
}
private static void move(Page source, Page target, int sourceIndex) {
......
......@@ -64,7 +64,7 @@ public class TestMVRTree extends TestMVStore {
SpatialKey k = new SpatialKey(i, x - p, x + p, y - p, y + p);
r.add(k, "" + i);
if (i > 0 && (i % len / 10) == 0) {
s.store();
s.save();
}
if (i > 0 && (i % 10000) == 0) {
render(r, getBaseDir() + "/test.png");
......@@ -72,7 +72,7 @@ public class TestMVRTree extends TestMVStore {
}
// System.out.println(prof.getTop(5));
// System.out.println("add: " + (System.currentTimeMillis() - t));
s.store();
s.save();
s.close();
s = openStore(fileName);
r = s.openMap("data", "r", "s2", "");
......
......@@ -19,6 +19,7 @@ import java.util.HashMap;
* 1 byte: 'c'
* 4 bytes: length
* 4 bytes: chunk id (an incrementing number)
* 4 bytes: pageCount
* 8 bytes: metaRootPos
* 8 bytes: maxLengthLive
* [ Page ] *
......@@ -40,6 +41,11 @@ public class Chunk {
*/
int length;
/**
* The number of pages.
*/
int pageCount;
/**
* The sum of the max length of all pages.
*/
......@@ -75,10 +81,12 @@ public class Chunk {
}
int length = buff.getInt();
int chunkId = buff.getInt();
int pageCount = buff.getInt();
long metaRootPos = buff.getLong();
long maxLengthLive = buff.getLong();
Chunk c = new Chunk(chunkId);
c.length = length;
c.pageCount = pageCount;
c.start = start;
c.metaRootPos = metaRootPos;
c.maxLengthLive = maxLengthLive;
......@@ -89,6 +97,7 @@ public class Chunk {
buff.put((byte) 'c');
buff.putInt(length);
buff.putInt(id);
buff.putInt(pageCount);
buff.putLong(metaRootPos);
buff.putLong(maxLengthLive);
}
......@@ -105,6 +114,7 @@ public class Chunk {
Chunk c = new Chunk(id);
c.start = Long.parseLong(map.get("start"));
c.length = Integer.parseInt(map.get("length"));
c.pageCount = Integer.parseInt(map.get("pageCount"));
c.maxLength = Long.parseLong(map.get("maxLength"));
c.maxLengthLive = Long.parseLong(map.get("maxLengthLive"));
c.metaRootPos = Long.parseLong(map.get("metaRoot"));
......@@ -129,6 +139,7 @@ public class Chunk {
"id:" + id + "," +
"start:" + start + "," +
"length:" + length + "," +
"pageCount:" + pageCount + "," +
"maxLength:" + maxLength + "," +
"maxLengthLive:" + maxLengthLive + "," +
"metaRoot:" + metaRootPos + "," +
......
......@@ -56,6 +56,16 @@ public class DataUtils {
*/
public static final long COMPRESSED_VAR_LONG_MAX = 0x1ffffffffffffL;
/**
* The estimated number of bytes used per page object.
*/
public static final int PAGE_MEMORY = 128;
/**
* The estimated number of bytes used per child entry.
*/
public static final int PAGE_MEMORY_CHILD = 16;
/**
* Get the length of the variable size int.
*
......
......@@ -79,11 +79,13 @@ public class Dump {
}
int chunkLength = block.getInt();
int chunkId = block.getInt();
int pageCount = block.getInt();
long metaRootPos = block.getLong();
long maxLengthLive = block.getLong();
writer.println(" chunk " + chunkId +
" at " + pos +
" length " + chunkLength +
" pageCount " + pageCount +
" root " + metaRootPos +
" maxLengthLive " + maxLengthLive);
ByteBuffer chunk = ByteBuffer.allocate(chunkLength);
......
......@@ -55,6 +55,7 @@ public class MVMap<K, V> extends AbstractMap<K, V>
this.valueType = valueType;
this.createVersion = createVersion;
this.root = Page.createEmpty(this, createVersion - 1);
store.registerUnsavedPage();
}
/**
......@@ -69,7 +70,7 @@ public class MVMap<K, V> extends AbstractMap<K, V>
checkWrite();
long writeVersion = store.getCurrentVersion();
Page p = root.copyOnWrite(writeVersion);
if (p.getKeyCount() > store.getMaxPageSize()) {
if (p.getMemory() > store.getMaxPageSize() && p.getKeyCount() > 1) {
int at = p.getKeyCount() / 2;
long totalCount = p.getTotalCount();
Object k = p.getKey(at);
......@@ -79,7 +80,8 @@ public class MVMap<K, V> extends AbstractMap<K, V>
Page[] childrenPages = { p, split };
long[] counts = { p.getTotalCount(), split.getTotalCount() };
p = Page.create(this, writeVersion, 1,
keys, null, children, childrenPages, counts, totalCount, 0);
keys, null, children, childrenPages, counts, totalCount, 0, 0);
store.registerUnsavedPage();
// now p is a node; insert continues
}
Object result = put(p, writeVersion, key, value);
......@@ -114,7 +116,7 @@ public class MVMap<K, V> extends AbstractMap<K, V>
index++;
}
Page c = p.getChildPage(index).copyOnWrite(writeVersion);
if (c.getKeyCount() >= store.getMaxPageSize()) {
if (c.getMemory() > store.getMaxPageSize() && c.getKeyCount() > 1) {
// split on the way down
int at = c.getKeyCount() / 2;
Object k = c.getKey(at);
......@@ -575,23 +577,6 @@ public class MVMap<K, V> extends AbstractMap<K, V>
}
Page cOld = p.getChildPage(index);
Page c = cOld.copyOnWrite(writeVersion);
int todoMerge;
// if (c.getKeyCount() < store.getMaxPageSize() / 2) {
// if (p.getChildPageCount() == 1) {
// int todo;
// // replace this node with the child
// } else if (index > 0) {
// int indexSibling = index - 1;
// Page sOld = p.getChildPage(indexSibling);
// merge(cOld, sOld);
// p.remove(indexSibling);
// } else {
// int indexSibling = index + 1;
// Page sOld = p.getChildPage(indexSibling);
// }
// }
long oldCount = c.getTotalCount();
result = remove(c, writeVersion, key);
if (oldCount == c.getTotalCount()) {
......@@ -611,29 +596,6 @@ public class MVMap<K, V> extends AbstractMap<K, V>
return result;
}
int todoMerge;
// private boolean merge(Page a, Page b, boolean left) {
// boolean leaf = a.isLeaf();
// if (leaf != b.isLeaf()) {
// return false;
// }
// if (left) {
// int moved = 0;
// while (a.getKeyCount() < b.getKeyCount() - 1) {
// if (leaf) {
// Object k = b.getKey(0);
// Object v = b.getValue(0);
// b.remove(0);
// a.insertLeaf(a.getKeyCount(), k, v);
// } else {
//
// }
// moved++;
// }
// }
// }
// }
protected void setRoot(Page newRoot) {
if (root != newRoot) {
removeUnusedOldVersions();
......
......@@ -35,13 +35,12 @@ header:
H:3,blockSize=4096,...
TODO:
- merge pages if small
- support custom fields in the file header (auto-server ip address,...)
- support stores that span multiple files (chunks stored in other files)
- triggers
- r-tree: add missing features (NN search for example)
- pluggable cache (specially for in-memory file systems)
- maybe store the factory class in the file header
- support custom fields in the header (auto-server ip address,...)
- auto-server: store port in the header
- recovery: keep some old chunks; don't overwritten for 1 minute
- pluggable caching (specially for in-memory file systems)
......@@ -57,11 +56,12 @@ TODO:
- support background writes (concurrent modification & store)
- limited support for writing to old versions (branches)
- support concurrent operations (including file I/O)
- maxPageSize should be size in bytes (not it is actually maxPageEntryCount)
- on insert, if the child page is already full, don't load and modify it - split directly
- performance test with encrypting file system
- possibly split chunk data into immutable and mutable
- compact: avoid processing pages using a counting bloom filter
- defragment (re-creating maps, specially those with small pages)
- write using ByteArrayOutputStream; remove DataType.getMaxLength
*/
......@@ -85,7 +85,7 @@ public class MVStore {
private final int readCacheSize = 2 * 1024 * 1024;
private int maxPageSize = 30;
private int maxPageSize = 4 * 1024;
private FileChannel file;
private long fileSize;
......@@ -123,6 +123,7 @@ public class MVStore {
private long currentVersion;
private int readCount;
private int writeCount;
private int unsavedPageCount;
private MVStore(String fileName, MapFactory mapFactory) {
this.fileName = fileName;
......@@ -502,7 +503,7 @@ public class MVStore {
*
* @return the new version (incremented if there were changes)
*/
public long store() {
public long save() {
if (!hasUnsavedChanges()) {
return currentVersion;
}
......@@ -560,7 +561,7 @@ public class MVStore {
}
Page p = m.getRoot();
if (p.getTotalCount() > 0) {
long root = p.writeTempRecursive(c, buff);
long root = p.writeUnsavedRecursive(c, buff);
meta.put("root." + m.getId(), "" + root);
}
}
......@@ -569,7 +570,7 @@ public class MVStore {
// this will modify maxLengthLive, but
// the correct value is written in the chunk header
meta.getRoot().writeTempRecursive(c, buff);
meta.getRoot().writeUnsavedRecursive(c, buff);
buff.flip();
int length = buff.limit();
......@@ -604,6 +605,7 @@ public class MVStore {
// write the new version (after the commit)
writeHeader();
shrinkFileIfPossible(1);
unsavedPageCount = 0;
return version;
}
......@@ -799,7 +801,7 @@ public class MVStore {
copyLive(c, old);
}
store();
save();
return true;
}
......@@ -885,24 +887,26 @@ public class MVStore {
void removePage(long pos) {
// we need to keep temporary pages,
// to support reading old versions and rollback
if (pos > 0) {
// this could result in a cache miss
// if the operation is rolled back,
// but we don't optimize for rollback
cache.remove(pos);
Chunk c = getChunk(pos);
HashMap<Integer, Chunk>freed = freedChunks.get(currentVersion);
if (freed == null) {
freed = New.hashMap();
freedChunks.put(currentVersion, freed);
}
Chunk f = freed.get(c.id);
if (f == null) {
f = new Chunk(c.id);
freed.put(c.id, f);
}
f.maxLengthLive -= DataUtils.getPageMaxLength(pos);
if (pos == 0) {
unsavedPageCount--;
return;
}
// this could result in a cache miss
// if the operation is rolled back,
// but we don't optimize for rollback
cache.remove(pos);
Chunk c = getChunk(pos);
HashMap<Integer, Chunk>freed = freedChunks.get(currentVersion);
if (freed == null) {
freed = New.hashMap();
freedChunks.put(currentVersion, freed);
}
Chunk f = freed.get(c.id);
if (f == null) {
f = new Chunk(c.id);
freed.put(c.id, f);
}
f.maxLengthLive -= DataUtils.getPageMaxLength(pos);
}
/**
......@@ -915,14 +919,22 @@ public class MVStore {
// System.out.println(string);
}
/**
* Set the maximum amount of memory a page should contain, in bytes. Larger
* pages are split. The default is 4 KB. This is not a limit in the page
* size, as pages with one entry can be larger. As a rule of thumb, pages
* should not be larger than 1 MB, for caching to work efficiently.
*
* @param maxPageSize the page size
*/
public void setMaxPageSize(int maxPageSize) {
this.maxPageSize = maxPageSize;
}
/**
* The maximum number of key-value pairs in a page.
* Get the maximum page size, in bytes.
*
* @return the maximum number of entries
* @return the maximum page size
*/
public int getMaxPageSize() {
return maxPageSize;
......@@ -1011,6 +1023,21 @@ public class MVStore {
return true;
}
/**
* Get the estimated number of unsaved pages. The returned value is not
* accurate, specially after rollbacks, but can be used to estimate the
* memory usage for unsaved data.
*
* @return the number of unsaved pages
*/
public int getUnsavedPageCount() {
return unsavedPageCount;
}
void registerUnsavedPage() {
unsavedPageCount++;
}
public int getStoreVersion() {
String x = getSetting("storeVersion");
return x == null ? 0 : Integer.parseInt(x);
......
......@@ -51,6 +51,11 @@ public class Page {
*/
private int sharedFlags;
/**
* The estimated memory used.
*/
private int memory;
private Object[] keys;
private Object[] values;
private long[] children;
......@@ -72,7 +77,7 @@ public class Page {
public static Page createEmpty(MVMap<?, ?> map, long version) {
return create(map, version, 0,
EMPTY_OBJECT_ARRAY, EMPTY_OBJECT_ARRAY,
null, null, null, 0, 0);
null, null, null, 0, 0, DataUtils.PAGE_MEMORY);
}
/**
......@@ -93,7 +98,7 @@ public class Page {
public static Page create(MVMap<?, ?> map, long version,
int keyCount, Object[] keys,
Object[] values, long[] children, Page[] childrenPages, long[] counts,
long totalCount, int sharedFlags) {
long totalCount, int sharedFlags, int memory) {
Page p = new Page(map, version);
// the position is 0
p.keys = keys;
......@@ -104,6 +109,7 @@ public class Page {
p.counts = counts;
p.totalCount = totalCount;
p.sharedFlags = sharedFlags;
p.memory = memory == 0 ? p.calculateMemory() : memory;
return p;
}
......@@ -246,7 +252,9 @@ public class Page {
Page newPage = create(map, writeVersion,
keyCount, keys, values, children, childrenPages,
counts, totalCount,
SHARED_KEYS | SHARED_VALUES | SHARED_CHILDREN | SHARED_COUNTS);
SHARED_KEYS | SHARED_VALUES | SHARED_CHILDREN | SHARED_COUNTS,
memory);
map.getStore().registerUnsavedPage();
newPage.cachedCompare = cachedCompare;
return newPage;
}
......@@ -328,7 +336,10 @@ public class Page {
totalCount = a;
Page newPage = create(map, version, b,
bKeys, bValues, null, null, null,
bKeys.length, 0);
bKeys.length, 0, 0);
map.getStore().registerUnsavedPage();
memory = calculateMemory();
newPage.memory = newPage.calculateMemory();
return newPage;
}
......@@ -372,7 +383,10 @@ public class Page {
}
Page newPage = create(map, version, b - 1,
bKeys, null, bChildren, bChildrenPages,
bCounts, t, 0);
bCounts, t, 0, 0);
map.getStore().registerUnsavedPage();
memory = calculateMemory();
newPage.memory = newPage.calculateMemory();
return newPage;
}
......@@ -441,6 +455,11 @@ public class Page {
keys = Arrays.copyOf(keys, keys.length);
sharedFlags &= ~SHARED_KEYS;
}
Object old = keys[index];
if (old != null) {
memory -= map.getKeyType().getMemory(old);
}
memory += map.getKeyType().getMemory(key);
keys[index] = key;
}
......@@ -457,6 +476,8 @@ public class Page {
values = Arrays.copyOf(values, values.length);
sharedFlags &= ~SHARED_VALUES;
}
memory -= map.getValueType().getMemory(old);
memory += map.getValueType().getMemory(value);
values[index] = value;
return old;
}
......@@ -511,6 +532,8 @@ public class Page {
keyCount++;
sharedFlags &= ~(SHARED_KEYS | SHARED_VALUES);
totalCount++;
memory += map.getKeyType().getMemory(key);
memory += map.getValueType().getMemory(value);
}
/**
......@@ -545,6 +568,8 @@ public class Page {
sharedFlags &= ~(SHARED_KEYS | SHARED_CHILDREN | SHARED_COUNTS);
totalCount += childPage.getTotalCount();
memory += map.getKeyType().getMemory(key);
memory += DataUtils.PAGE_MEMORY_CHILD;
}
/**
......@@ -554,6 +579,8 @@ public class Page {
*/
public void remove(int index) {
int keyIndex = index >= keyCount ? index - 1 : index;
Object old = keys[keyIndex];
memory -= map.getKeyType().getMemory(old);
if ((sharedFlags & SHARED_KEYS) == 0 && keys.length > keyCount - 4) {
if (keyIndex < keyCount - 1) {
System.arraycopy(keys, keyIndex + 1, keys, keyIndex, keyCount - keyIndex - 1);
......@@ -567,6 +594,8 @@ public class Page {
}
if (values != null) {
old = values[index];
memory -= map.getValueType().getMemory(old);
if ((sharedFlags & SHARED_VALUES) == 0 && values.length > keyCount - 4) {
if (index < keyCount - 1) {
System.arraycopy(values, index + 1, values, index, keyCount - index - 1);
......@@ -582,6 +611,7 @@ public class Page {
}
keyCount--;
if (children != null) {
memory -= DataUtils.PAGE_MEMORY_CHILD;
long countOffset = counts[index];
long[] newChildren = new long[children.length - 1];
......@@ -638,8 +668,10 @@ public class Page {
compressor.expand(comp, 0, compLen, exp, 0, exp.length);
buff = ByteBuffer.wrap(exp);
}
DataType keyType = map.getKeyType();
for (int i = 0; i < len; i++) {
keys[i] = map.getKeyType().read(buff);
Object k = keyType.read(buff);
keys[i] = k;
}
if (node) {
children = new long[len + 1];
......@@ -657,11 +689,14 @@ public class Page {
totalCount = total;
} else {
values = new Object[len];
DataType valueType = map.getValueType();
for (int i = 0; i < len; i++) {
values[i] = map.getValueType().read(buff);
Object v = valueType.read(buff);
values[i] = v;
}
totalCount = len;
}
memory = calculateMemory();
}
/**
......@@ -722,6 +757,7 @@ public class Page {
long max = DataUtils.getPageMaxLength(pos);
chunk.maxLength += max;
chunk.maxLengthLive += max;
chunk.pageCount++;
}
/**
......@@ -769,13 +805,13 @@ public class Page {
* @param buff the target buffer
* @return the page id
*/
long writeTempRecursive(Chunk chunk, ByteBuffer buff) {
long writeUnsavedRecursive(Chunk chunk, ByteBuffer buff) {
if (!isLeaf()) {
int len = children.length;
for (int i = 0; i < len; i++) {
Page p = childrenPages[i];
if (p != null) {
children[i] = p.writeTempRecursive(chunk, buff);
children[i] = p.writeUnsavedRecursive(chunk, buff);
childrenPages[i] = null;
}
}
......@@ -809,4 +845,28 @@ public class Page {
return pos != 0 ? (int) (pos | (pos >>> 32)) : super.hashCode();
}
public int getMemory() {
if (MVStore.ASSERT) {
if (memory != calculateMemory()) {
throw new RuntimeException("Memory calculation error");
}
}
return memory;
}
private int calculateMemory() {
int mem = DataUtils.PAGE_MEMORY;
for (int i = 0; i < keyCount; i++) {
mem += map.getKeyType().getMemory(keys[i]);
}
if (this.isLeaf()) {
for (int i = 0; i < keyCount; i++) {
mem += map.getValueType().getMemory(values[i]);
}
} else {
mem += this.getChildPageCount() * DataUtils.PAGE_MEMORY_CHILD;
}
return mem;
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论