提交 16b41b06 authored 作者: Thomas Mueller's avatar Thomas Mueller

MVStore: use a mark & sweep GC algorithm instead of reference counting, to…

MVStore: use a mark & sweep GC algorithm instead of reference counting, to ensure used chunks are never overwrite, even if the reference counting algorithm does not work properly.
上级 75e7ff82
...@@ -17,7 +17,10 @@ Change Log ...@@ -17,7 +17,10 @@ Change Log
<h1>Change Log</h1> <h1>Change Log</h1>
<h2>Next Version (unreleased)</h2> <h2>Next Version (unreleased)</h2>
<ul><li>In the multi-threaded mode, updating the column selectivity ("analyze") <ul><li>MVStore: use a mark & sweep GC algorithm instead of reference counting,
to ensure used chunks are never overwrite, even if the reference counting
algorithm does not work properly.
</li><li>In the multi-threaded mode, updating the column selectivity ("analyze")
in the background sometimes did not work. in the background sometimes did not work.
</li><li>In the multi-threaded mode, database metadata operations </li><li>In the multi-threaded mode, database metadata operations
did sometimes not work if the schema was changed at the same time did sometimes not work if the schema was changed at the same time
......
...@@ -123,6 +123,23 @@ public class MVMap<K, V> extends AbstractMap<K, V> ...@@ -123,6 +123,23 @@ public class MVMap<K, V> extends AbstractMap<K, V>
return (V) result; return (V) result;
} }
/**
* Add or replace a key-value pair in a branch.
*
* @param root the root page
* @param key the key (may not be null)
* @param value the value (may not be null)
* @return the new root page
*/
synchronized Page putBranch(Page root, K key, V value) {
DataUtils.checkArgument(value != null, "The value may not be null");
long v = writeVersion;
Page p = root.copy(v);
p = splitRootIfNeeded(p, v);
put(p, v, key, value);
return p;
}
/** /**
* Split the root page if necessary. * Split the root page if necessary.
* *
......
...@@ -153,13 +153,15 @@ public class MVStoreTool { ...@@ -153,13 +153,15 @@ public class MVStoreTool {
boolean node = (type & 1) != 0; boolean node = (type & 1) != 0;
pw.printf( pw.printf(
"+%0" + len + "+%0" + len +
"x %s, map %x, %d entries, %d bytes%n", "x %s, map %x, %d entries, %d bytes, maxLen %x%n",
p, p,
(node ? "node" : "leaf") + (node ? "node" : "leaf") +
(compressed ? " compressed" : ""), (compressed ? " compressed" : ""),
mapId, mapId,
node ? entries + 1 : entries, node ? entries + 1 : entries,
pageSize); pageSize,
DataUtils.getPageMaxLength(DataUtils.getPagePos(0, 0, pageSize, 0))
);
p += pageSize; p += pageSize;
Integer mapSize = mapSizes.get(mapId); Integer mapSize = mapSizes.get(mapId);
if (mapSize == null) { if (mapSize == null) {
......
...@@ -7,8 +7,10 @@ package org.h2.mvstore; ...@@ -7,8 +7,10 @@ package org.h2.mvstore;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet;
import org.h2.compress.Compressor; import org.h2.compress.Compressor;
import org.h2.mvstore.type.DataType; import org.h2.mvstore.type.DataType;
import org.h2.util.New;
/** /**
* A page (a node or a leaf). * A page (a node or a leaf).
...@@ -766,6 +768,11 @@ public class Page { ...@@ -766,6 +768,11 @@ public class Page {
} }
pos = DataUtils.getPagePos(chunkId, start, pageLength, type); pos = DataUtils.getPagePos(chunkId, start, pageLength, type);
store.cachePage(pos, this, getMemory()); store.cachePage(pos, this, getMemory());
if (type == DataUtils.PAGE_TYPE_NODE) {
// cache again - this will make sure nodes stays in the cache
// for a longer time
store.cachePage(pos, this, getMemory());
}
long max = DataUtils.getPageMaxLength(pos); long max = DataUtils.getPageMaxLength(pos);
chunk.maxLen += max; chunk.maxLen += max;
chunk.maxLenLive += max; chunk.maxLenLive += max;
...@@ -820,7 +827,9 @@ public class Page { ...@@ -820,7 +827,9 @@ public class Page {
* Unlink the children recursively after all data is written. * Unlink the children recursively after all data is written.
*/ */
void writeEnd() { void writeEnd() {
if (!isLeaf()) { if (isLeaf()) {
return;
}
int len = children.length; int len = children.length;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
PageReference ref = children[i]; PageReference ref = children[i];
...@@ -834,7 +843,6 @@ public class Page { ...@@ -834,7 +843,6 @@ public class Page {
} }
} }
} }
}
long getVersion() { long getVersion() {
return version; return version;
...@@ -939,4 +947,142 @@ public class Page { ...@@ -939,4 +947,142 @@ public class Page {
} }
/**
* Contains information about which other pages are referenced (directly or
* indirectly) by the given page. This is a subset of the page data, for
* pages of type node. This information is used for garbage collection (to
* quickly find out which chunks are still in use).
*/
public static class PageChildren {
/**
* An empty array of type long.
*/
public static final long[] EMPTY_ARRAY = new long[0];
/**
* The position of the page.
*/
final long pos;
/**
* The page positions of (direct or indirect) children. Depending on the
* use case, this can be the complete list, or only a subset of all
* children, for example only only one reference to a child in another
* chunk.
*/
long[] children;
private PageChildren(long pos, long[] children) {
this.pos = pos;
this.children = children;
}
PageChildren(Page p) {
this.pos = p.getPos();
int count = p.getRawChildPageCount();
this.children = new long[count];
for (int i = 0; i < count; i++) {
children[i] = p.getChildPagePos(i);
}
}
int getMemory() {
return 64 + 8 * children.length;
}
/**
* Read the page from the buffer.
*
* @param pos the position
* @param buff the buffer
* @param chunkId the chunk id
* @param mapId the map id
* @param offset the offset within the chunk
* @param maxLength the maximum length
*/
static PageChildren read(FileStore fileStore, long filePos, int mapId, long pos) {
ByteBuffer buff;
int maxLength = DataUtils.getPageMaxLength(pos);
if (maxLength == DataUtils.PAGE_LARGE) {
buff = fileStore.readFully(filePos, 128);
maxLength = buff.getInt();
// read the first bytes again
}
long fileSize = fileStore.fileSize;
maxLength = (int) Math.min(fileSize - filePos, maxLength);
int length = maxLength;
if (length < 0) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"Illegal page length {0} reading at {1}; file size {2} ",
length, filePos, fileSize);
}
buff = fileStore.readFully(filePos, length);
int chunkId = DataUtils.getPageChunkId(pos);
int offset = DataUtils.getPageOffset(pos);
int start = buff.position();
int pageLength = buff.getInt();
if (pageLength > maxLength) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected page length =< {1}, got {2}",
chunkId, maxLength, pageLength);
}
buff.limit(start + pageLength);
short check = buff.getShort();
int m = DataUtils.readVarInt(buff);
if (m != mapId) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected map id {1}, got {2}",
chunkId, mapId, m);
}
int checkTest = DataUtils.getCheckValue(chunkId)
^ DataUtils.getCheckValue(offset)
^ DataUtils.getCheckValue(pageLength);
if (check != (short) checkTest) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected check value {1}, got {2}",
chunkId, checkTest, check);
}
int len = DataUtils.readVarInt(buff);
int type = buff.get();
boolean node = (type & 1) == DataUtils.PAGE_TYPE_NODE;
if (!node) {
return null;
}
long[] children = new long[len + 1];
for (int i = 0; i <= len; i++) {
children[i] = buff.getLong();
}
return new PageChildren(pos, children);
}
void removeDuplicateChunkReferences() {
HashSet<Integer> chunks = New.hashSet();
// we don't need references to leaves in the same chunk
chunks.add(DataUtils.getPageChunkId(pos));
for (int i = 0; i < children.length; i++) {
long p = children[i];
if (DataUtils.getPageType(p) == DataUtils.PAGE_TYPE_NODE) {
continue;
}
int chunkId = DataUtils.getPageChunkId(p);
if (chunks.add(chunkId)) {
continue;
}
long[] c2 = new long[children.length - 1];
DataUtils.copyExcept(children, c2, children.length, i);
children = c2;
i--;
}
if (children.length == 0) {
children = EMPTY_ARRAY;
}
}
}
} }
...@@ -221,6 +221,7 @@ public class TestConcurrent extends TestMVStore { ...@@ -221,6 +221,7 @@ public class TestConcurrent extends TestMVStore {
} }
FileUtils.deleteRecursive("memFS:", false); FileUtils.deleteRecursive("memFS:", false);
} }
private void testConcurrentFree() throws InterruptedException { private void testConcurrentFree() throws InterruptedException {
String fileName = "memFS:testConcurrentFree.h3"; String fileName = "memFS:testConcurrentFree.h3";
for (int test = 0; test < 10; test++) { for (int test = 0; test < 10; test++) {
...@@ -276,6 +277,13 @@ public class TestConcurrent extends TestMVStore { ...@@ -276,6 +277,13 @@ public class TestConcurrent extends TestMVStore {
} }
} }
task.get(); task.get();
// this will mark old chunks as unused,
// but not remove (and overwrite) them yet
s.commit();
// this will remove them, so we end up with
// one unused one, and one active one
MVMap<Integer, Integer> m = s.openMap("dummy");
m.put(1, 1);
s.commit(); s.commit();
MVMap<String, String> meta = s.getMetaMap(); MVMap<String, String> meta = s.getMetaMap();
...@@ -285,8 +293,7 @@ public class TestConcurrent extends TestMVStore { ...@@ -285,8 +293,7 @@ public class TestConcurrent extends TestMVStore {
chunkCount++; chunkCount++;
} }
} }
// the chunk metadata is not yet written assertTrue("" + chunkCount, chunkCount < 3);
assertEquals(0, chunkCount);
s.close(); s.close();
} }
FileUtils.deleteRecursive("memFS:", false); FileUtils.deleteRecursive("memFS:", false);
......
...@@ -398,19 +398,20 @@ public class TestMVStore extends TestBase { ...@@ -398,19 +398,20 @@ public class TestMVStore extends TestBase {
MVStore s = new MVStore.Builder(). MVStore s = new MVStore.Builder().
fileStore(offHeap). fileStore(offHeap).
open(); open();
int count = 1000;
Map<Integer, String> map = s.openMap("data"); Map<Integer, String> map = s.openMap("data");
for (int i = 0; i < 1000; i++) { for (int i = 0; i < count; i++) {
map.put(i, "Hello " + i); map.put(i, "Hello " + i);
s.commit(); s.commit();
} }
assertTrue(offHeap.getWriteCount() > 1000); assertTrue(offHeap.getWriteCount() > count);
s.close(); s.close();
s = new MVStore.Builder(). s = new MVStore.Builder().
fileStore(offHeap). fileStore(offHeap).
open(); open();
map = s.openMap("data"); map = s.openMap("data");
for (int i = 0; i < 1000; i++) { for (int i = 0; i < count; i++) {
assertEquals("Hello " + i, map.get(i)); assertEquals("Hello " + i, map.get(i));
} }
s.close(); s.close();
...@@ -1734,8 +1735,13 @@ public class TestMVStore extends TestBase { ...@@ -1734,8 +1735,13 @@ public class TestMVStore extends TestBase {
assertTrue(chunkCount2 >= chunkCount1); assertTrue(chunkCount2 >= chunkCount1);
m = s.openMap("data"); m = s.openMap("data");
assertTrue(s.compact(80, 50 * 1024)); for (int i = 0; i < 10; i++) {
assertFalse(s.compact(80, 1024)); boolean result = s.compact(50, 50 * 1024);
if (!result) {
break;
}
}
assertFalse(s.compact(50, 1024));
int chunkCount3 = 0; int chunkCount3 = 0;
for (String k : meta.keySet()) { for (String k : meta.keySet()) {
...@@ -1744,7 +1750,8 @@ public class TestMVStore extends TestBase { ...@@ -1744,7 +1750,8 @@ public class TestMVStore extends TestBase {
} }
} }
assertTrue(chunkCount3 < chunkCount1); assertTrue(chunkCount1 + ">" + chunkCount2 + ">" + chunkCount3,
chunkCount3 < chunkCount1);
for (int i = 0; i < 10 * factor; i++) { for (int i = 0; i < 10 * factor; i++) {
assertEquals("x" + i, "Hello" + (i / factor), m.get(i)); assertEquals("x" + i, "Hello" + (i / factor), m.get(i));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论