提交 16b41b06 authored 作者: Thomas Mueller's avatar Thomas Mueller

MVStore: use a mark & sweep GC algorithm instead of reference counting, to…

MVStore: use a mark & sweep GC algorithm instead of reference counting, to ensure used chunks are never overwrite, even if the reference counting algorithm does not work properly.
上级 75e7ff82
......@@ -17,7 +17,10 @@ Change Log
<h1>Change Log</h1>
<h2>Next Version (unreleased)</h2>
<ul><li>In the multi-threaded mode, updating the column selectivity ("analyze")
<ul><li>MVStore: use a mark & sweep GC algorithm instead of reference counting,
to ensure used chunks are never overwrite, even if the reference counting
algorithm does not work properly.
</li><li>In the multi-threaded mode, updating the column selectivity ("analyze")
in the background sometimes did not work.
</li><li>In the multi-threaded mode, database metadata operations
did sometimes not work if the schema was changed at the same time
......
......@@ -123,6 +123,23 @@ public class MVMap<K, V> extends AbstractMap<K, V>
return (V) result;
}
/**
* Add or replace a key-value pair in a branch.
*
* @param root the root page
* @param key the key (may not be null)
* @param value the value (may not be null)
* @return the new root page
*/
synchronized Page putBranch(Page root, K key, V value) {
DataUtils.checkArgument(value != null, "The value may not be null");
long v = writeVersion;
Page p = root.copy(v);
p = splitRootIfNeeded(p, v);
put(p, v, key, value);
return p;
}
/**
* Split the root page if necessary.
*
......
......@@ -153,13 +153,15 @@ public class MVStoreTool {
boolean node = (type & 1) != 0;
pw.printf(
"+%0" + len +
"x %s, map %x, %d entries, %d bytes%n",
"x %s, map %x, %d entries, %d bytes, maxLen %x%n",
p,
(node ? "node" : "leaf") +
(compressed ? " compressed" : ""),
mapId,
node ? entries + 1 : entries,
pageSize);
pageSize,
DataUtils.getPageMaxLength(DataUtils.getPagePos(0, 0, pageSize, 0))
);
p += pageSize;
Integer mapSize = mapSizes.get(mapId);
if (mapSize == null) {
......
......@@ -7,8 +7,10 @@ package org.h2.mvstore;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import org.h2.compress.Compressor;
import org.h2.mvstore.type.DataType;
import org.h2.util.New;
/**
* A page (a node or a leaf).
......@@ -766,6 +768,11 @@ public class Page {
}
pos = DataUtils.getPagePos(chunkId, start, pageLength, type);
store.cachePage(pos, this, getMemory());
if (type == DataUtils.PAGE_TYPE_NODE) {
// cache again - this will make sure nodes stays in the cache
// for a longer time
store.cachePage(pos, this, getMemory());
}
long max = DataUtils.getPageMaxLength(pos);
chunk.maxLen += max;
chunk.maxLenLive += max;
......@@ -820,7 +827,9 @@ public class Page {
* Unlink the children recursively after all data is written.
*/
void writeEnd() {
if (!isLeaf()) {
if (isLeaf()) {
return;
}
int len = children.length;
for (int i = 0; i < len; i++) {
PageReference ref = children[i];
......@@ -834,7 +843,6 @@ public class Page {
}
}
}
}
long getVersion() {
return version;
......@@ -939,4 +947,142 @@ public class Page {
}
/**
* Contains information about which other pages are referenced (directly or
* indirectly) by the given page. This is a subset of the page data, for
* pages of type node. This information is used for garbage collection (to
* quickly find out which chunks are still in use).
*/
public static class PageChildren {
/**
* An empty array of type long.
*/
public static final long[] EMPTY_ARRAY = new long[0];
/**
* The position of the page.
*/
final long pos;
/**
* The page positions of (direct or indirect) children. Depending on the
* use case, this can be the complete list, or only a subset of all
* children, for example only only one reference to a child in another
* chunk.
*/
long[] children;
private PageChildren(long pos, long[] children) {
this.pos = pos;
this.children = children;
}
PageChildren(Page p) {
this.pos = p.getPos();
int count = p.getRawChildPageCount();
this.children = new long[count];
for (int i = 0; i < count; i++) {
children[i] = p.getChildPagePos(i);
}
}
int getMemory() {
return 64 + 8 * children.length;
}
/**
* Read the page from the buffer.
*
* @param pos the position
* @param buff the buffer
* @param chunkId the chunk id
* @param mapId the map id
* @param offset the offset within the chunk
* @param maxLength the maximum length
*/
static PageChildren read(FileStore fileStore, long filePos, int mapId, long pos) {
ByteBuffer buff;
int maxLength = DataUtils.getPageMaxLength(pos);
if (maxLength == DataUtils.PAGE_LARGE) {
buff = fileStore.readFully(filePos, 128);
maxLength = buff.getInt();
// read the first bytes again
}
long fileSize = fileStore.fileSize;
maxLength = (int) Math.min(fileSize - filePos, maxLength);
int length = maxLength;
if (length < 0) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"Illegal page length {0} reading at {1}; file size {2} ",
length, filePos, fileSize);
}
buff = fileStore.readFully(filePos, length);
int chunkId = DataUtils.getPageChunkId(pos);
int offset = DataUtils.getPageOffset(pos);
int start = buff.position();
int pageLength = buff.getInt();
if (pageLength > maxLength) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected page length =< {1}, got {2}",
chunkId, maxLength, pageLength);
}
buff.limit(start + pageLength);
short check = buff.getShort();
int m = DataUtils.readVarInt(buff);
if (m != mapId) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected map id {1}, got {2}",
chunkId, mapId, m);
}
int checkTest = DataUtils.getCheckValue(chunkId)
^ DataUtils.getCheckValue(offset)
^ DataUtils.getCheckValue(pageLength);
if (check != (short) checkTest) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected check value {1}, got {2}",
chunkId, checkTest, check);
}
int len = DataUtils.readVarInt(buff);
int type = buff.get();
boolean node = (type & 1) == DataUtils.PAGE_TYPE_NODE;
if (!node) {
return null;
}
long[] children = new long[len + 1];
for (int i = 0; i <= len; i++) {
children[i] = buff.getLong();
}
return new PageChildren(pos, children);
}
void removeDuplicateChunkReferences() {
HashSet<Integer> chunks = New.hashSet();
// we don't need references to leaves in the same chunk
chunks.add(DataUtils.getPageChunkId(pos));
for (int i = 0; i < children.length; i++) {
long p = children[i];
if (DataUtils.getPageType(p) == DataUtils.PAGE_TYPE_NODE) {
continue;
}
int chunkId = DataUtils.getPageChunkId(p);
if (chunks.add(chunkId)) {
continue;
}
long[] c2 = new long[children.length - 1];
DataUtils.copyExcept(children, c2, children.length, i);
children = c2;
i--;
}
if (children.length == 0) {
children = EMPTY_ARRAY;
}
}
}
}
......@@ -221,6 +221,7 @@ public class TestConcurrent extends TestMVStore {
}
FileUtils.deleteRecursive("memFS:", false);
}
private void testConcurrentFree() throws InterruptedException {
String fileName = "memFS:testConcurrentFree.h3";
for (int test = 0; test < 10; test++) {
......@@ -276,6 +277,13 @@ public class TestConcurrent extends TestMVStore {
}
}
task.get();
// this will mark old chunks as unused,
// but not remove (and overwrite) them yet
s.commit();
// this will remove them, so we end up with
// one unused one, and one active one
MVMap<Integer, Integer> m = s.openMap("dummy");
m.put(1, 1);
s.commit();
MVMap<String, String> meta = s.getMetaMap();
......@@ -285,8 +293,7 @@ public class TestConcurrent extends TestMVStore {
chunkCount++;
}
}
// the chunk metadata is not yet written
assertEquals(0, chunkCount);
assertTrue("" + chunkCount, chunkCount < 3);
s.close();
}
FileUtils.deleteRecursive("memFS:", false);
......
......@@ -398,19 +398,20 @@ public class TestMVStore extends TestBase {
MVStore s = new MVStore.Builder().
fileStore(offHeap).
open();
int count = 1000;
Map<Integer, String> map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
for (int i = 0; i < count; i++) {
map.put(i, "Hello " + i);
s.commit();
}
assertTrue(offHeap.getWriteCount() > 1000);
assertTrue(offHeap.getWriteCount() > count);
s.close();
s = new MVStore.Builder().
fileStore(offHeap).
open();
map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
for (int i = 0; i < count; i++) {
assertEquals("Hello " + i, map.get(i));
}
s.close();
......@@ -1734,8 +1735,13 @@ public class TestMVStore extends TestBase {
assertTrue(chunkCount2 >= chunkCount1);
m = s.openMap("data");
assertTrue(s.compact(80, 50 * 1024));
assertFalse(s.compact(80, 1024));
for (int i = 0; i < 10; i++) {
boolean result = s.compact(50, 50 * 1024);
if (!result) {
break;
}
}
assertFalse(s.compact(50, 1024));
int chunkCount3 = 0;
for (String k : meta.keySet()) {
......@@ -1744,7 +1750,8 @@ public class TestMVStore extends TestBase {
}
}
assertTrue(chunkCount3 < chunkCount1);
assertTrue(chunkCount1 + ">" + chunkCount2 + ">" + chunkCount3,
chunkCount3 < chunkCount1);
for (int i = 0; i < 10 * factor; i++) {
assertEquals("x" + i, "Hello" + (i / factor), m.get(i));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论