提交 e64b0d36 authored 作者: Thomas Mueller's avatar Thomas Mueller

Off-heap storage

上级 2a280505
...@@ -32,6 +32,7 @@ MVStore ...@@ -32,6 +32,7 @@ MVStore
<a href="#transactions">- Transactions</a><br /> <a href="#transactions">- Transactions</a><br />
<a href="#inMemory">- In-Memory Performance and Usage</a><br /> <a href="#inMemory">- In-Memory Performance and Usage</a><br />
<a href="#dataTypes">- Pluggable Data Types</a><br /> <a href="#dataTypes">- Pluggable Data Types</a><br />
<a href="#offHeap">- Off-Heap and Pluggable Storage</a><br />
<a href="#blob">- BLOB Support</a><br /> <a href="#blob">- BLOB Support</a><br />
<a href="#pluggableMap">- R-Tree and Pluggable Map Implementations</a><br /> <a href="#pluggableMap">- R-Tree and Pluggable Map Implementations</a><br />
<a href="#caching">- Concurrent Operations and Caching</a><br /> <a href="#caching">- Concurrent Operations and Caching</a><br />
...@@ -60,8 +61,11 @@ But it can be also directly within an application, without using JDBC or SQL. ...@@ -60,8 +61,11 @@ But it can be also directly within an application, without using JDBC or SQL.
</li><li>It is intended to be fast, simple to use, and small. </li><li>It is intended to be fast, simple to use, and small.
</li><li>Old versions of the data can be read concurrently with all other operations. </li><li>Old versions of the data can be read concurrently with all other operations.
</li><li>Transaction are supported (including concurrent transactions and 2-phase commit). </li><li>Transaction are supported (including concurrent transactions and 2-phase commit).
</li><li>The tool is very modular. It supports pluggable data types / serialization, </li><li>The tool is very modular.
pluggable map implementations (B-tree, R-tree, concurrent B-tree currently), BLOB storage, It supports pluggable data types / serialization,
pluggable storage (to a file, to off-heap memory),
pluggable map implementations (B-tree, R-tree, concurrent B-tree currently),
BLOB storage,
and a file system abstraction to support encrypted files and zip files. and a file system abstraction to support encrypted files and zip files.
</li></ul> </li></ul>
...@@ -102,6 +106,7 @@ MVStore s = new MVStore.Builder(). ...@@ -102,6 +106,7 @@ MVStore s = new MVStore.Builder().
compressData(). compressData().
encryptionKey("007".toCharArray()). encryptionKey("007".toCharArray()).
fileName(fileName). fileName(fileName).
fileStore(new FileStore()).
pageSplitSize(6 * 1024). pageSplitSize(6 * 1024).
readOnly(). readOnly().
writeBufferSize(8). writeBufferSize(8).
...@@ -114,6 +119,7 @@ MVStore s = new MVStore.Builder(). ...@@ -114,6 +119,7 @@ MVStore s = new MVStore.Builder().
</li><li>compressData: compress the data when storing. </li><li>compressData: compress the data when storing.
</li><li>encryptionKey: the encryption key for file encryption. </li><li>encryptionKey: the encryption key for file encryption.
</li><li>fileName: the name of the file, for file based stores. </li><li>fileName: the name of the file, for file based stores.
</li><li>fileStore: the storage implementation to use.
</li><li>pageSplitSize: the point where pages are split. </li><li>pageSplitSize: the point where pages are split.
</li><li>readOnly: open the file in read-only mode. </li><li>readOnly: open the file in read-only mode.
</li><li>writeBufferSize: the size of the write buffer in MB. </li><li>writeBufferSize: the size of the write buffer in MB.
...@@ -277,6 +283,19 @@ Also, there is no inherent limit to the number of maps and chunks. ...@@ -277,6 +283,19 @@ Also, there is no inherent limit to the number of maps and chunks.
Due to using a log structured storage, there is no special case handling for large keys or pages. Due to using a log structured storage, there is no special case handling for large keys or pages.
</p> </p>
<h3 id="offHeap">Off-Heap and Pluggable Storage</h3>
<p>
Storage is pluggable. The default storage is to a single file (unless pure in-memory operation is used).
</p>
<p>
An off-heap storage implementation is available. This storage keeps the data in the off-heap memory,
meaning outside of the regular garbage collected heap. This allows to use very large in-memory
stores without having to increase the JVM heap (which would increase Java garbage collection
cost a lot). Memory is allocated using <code>ByteBuffer.allocateDirect</code>.
One chunk is allocated at a time (each chunk is usually a few MB large), so that
allocation cost is low.
</p>
<h3 id="blob">BLOB Support</h3> <h3 id="blob">BLOB Support</h3>
<p> <p>
There is a mechanism that stores large binary objects by splitting them into smaller blocks. There is a mechanism that stores large binary objects by splitting them into smaller blocks.
......
...@@ -18,17 +18,28 @@ import org.h2.store.fs.FilePathCrypt; ...@@ -18,17 +18,28 @@ import org.h2.store.fs.FilePathCrypt;
import org.h2.store.fs.FilePathNio; import org.h2.store.fs.FilePathNio;
/** /**
* The storage mechanism of the MVStore. * The default storage mechanism of the MVStore. This implementation persists
* data to a file. The file store is responsible to persist data and for free
* space management.
*/ */
public class FileStore { public class FileStore {
private String fileName; protected long readCount;
private boolean readOnly; protected long writeCount;
private FileChannel file;
private FileLock fileLock; /**
private long fileSize; * The free spaces between the chunks. The first block to use is block 2
private long readCount; * (the first two blocks are the store header).
private long writeCount; */
protected final FreeSpaceBitSet freeSpace = new FreeSpaceBitSet(2, MVStore.BLOCK_SIZE);
protected String fileName;
protected boolean readOnly;
protected long fileSize;
protected FileChannel file;
protected FileLock fileLock;
@Override @Override
public String toString() { public String toString() {
...@@ -46,16 +57,6 @@ public class FileStore { ...@@ -46,16 +57,6 @@ public class FileStore {
DataUtils.writeFully(file, pos, src); DataUtils.writeFully(file, pos, src);
} }
/**
* Mark the space within the file as unused.
*
* @param pos
* @param length
*/
public void free(long pos, int length) {
}
public void open(String fileName, boolean readOnly, char[] encryptionKey) { public void open(String fileName, boolean readOnly, char[] encryptionKey) {
if (fileName != null && fileName.indexOf(':') < 0) { if (fileName != null && fileName.indexOf(':') < 0) {
// NIO is used, unless a different file system is specified // NIO is used, unless a different file system is specified
...@@ -109,6 +110,7 @@ public class FileStore { ...@@ -109,6 +110,7 @@ public class FileStore {
fileLock = null; fileLock = null;
} }
file.close(); file.close();
freeSpace.clear();
} catch (Exception e) { } catch (Exception e) {
throw DataUtils.newIllegalStateException( throw DataUtils.newIllegalStateException(
DataUtils.ERROR_WRITING_FAILED, DataUtils.ERROR_WRITING_FAILED,
...@@ -134,6 +136,7 @@ public class FileStore { ...@@ -134,6 +136,7 @@ public class FileStore {
public void truncate(long size) { public void truncate(long size) {
try { try {
writeCount++;
file.truncate(size); file.truncate(size);
fileSize = Math.min(fileSize, size); fileSize = Math.min(fileSize, size);
} catch (IOException e) { } catch (IOException e) {
...@@ -178,4 +181,38 @@ public class FileStore { ...@@ -178,4 +181,38 @@ public class FileStore {
return readOnly; return readOnly;
} }
public int getDefaultRetentionTime() {
return 45000;
}
public void markUsed(long start, int len) {
freeSpace.markUsed(start, len);
}
public long allocate(int length) {
return freeSpace.allocate(length);
}
/**
* Mark the space as free.
*
* @param pos the position in bytes
* @param length the number of bytes
*/
public void free(long pos, int length) {
freeSpace.free(pos, length);
}
public int getFillRate() {
return freeSpace.getFillRate();
}
public long getFirstFree() {
return freeSpace.getFirstFree();
}
public void clear() {
freeSpace.clear();
}
} }
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.mvstore;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.TreeMap;
/**
* A storage mechanism that "persists" data in the off-heap area of the main
* memory.
*/
public class OffHeapStore extends FileStore {
private final TreeMap<Long, ByteBuffer> memory = new TreeMap<Long, ByteBuffer>();
@Override
public void open(String fileName, boolean readOnly, char[] encryptionKey) {
// nothing to do
}
@Override
public String toString() {
return memory.toString();
}
@Override
public void readFully(long pos, ByteBuffer dst) {
Entry<Long, ByteBuffer> mem = memory.floorEntry(pos);
if (mem == null) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not read from position {0}", pos);
}
readCount++;
ByteBuffer buff = mem.getValue();
ByteBuffer read = buff.duplicate();
int offset = (int) (pos - mem.getKey());
read.position(offset);
read.limit(dst.remaining() + offset);
dst.put(read);
dst.rewind();
}
@Override
public void free(long pos, int length) {
freeSpace.free(pos, length);
ByteBuffer buff = memory.remove(pos);
if (buff == null) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not find entry at position {0}", pos);
} else if (buff.remaining() != length) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Partial remove is not supported at position {0}", pos);
}
}
@Override
public void writeFully(long pos, ByteBuffer src) {
fileSize = Math.max(fileSize, pos + src.remaining());
Entry<Long, ByteBuffer> mem = memory.floorEntry(pos);
if (mem == null) {
// not found: create a new entry
writeNewEntry(pos, src);
return;
}
long prevPos = mem.getKey();
ByteBuffer buff = mem.getValue();
int prevLength = buff.capacity();
int length = src.remaining();
if (prevPos == pos) {
if (prevLength != length) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not write to position {0}; partial overwrite is not supported", pos);
}
writeCount++;
buff.rewind();
buff.put(src);
return;
}
if (prevPos + prevLength > pos) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not write to position {0}; partial overwrite is not supported", pos);
}
writeNewEntry(pos, src);
}
private void writeNewEntry(long pos, ByteBuffer src) {
writeCount++;
int length = src.remaining();
ByteBuffer buff = ByteBuffer.allocateDirect(length);
buff.put(src);
buff.rewind();
memory.put(pos, buff);
}
@Override
public void truncate(long size) {
writeCount++;
if (size == 0) {
fileSize = 0;
memory.clear();
return;
}
for (Iterator<Long> it = memory.keySet().iterator(); it.hasNext();) {
long pos = it.next();
if (pos < size) {
break;
}
ByteBuffer buff = memory.get(pos);
if (buff.capacity() > size) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not truncate to {0}; partial truncate is not supported", pos);
}
it.remove();
}
}
@Override
public void close() {
truncate(0);
freeSpace.clear();
}
@Override
public void sync() {
// nothing to do
}
@Override
public int getDefaultRetentionTime() {
return 0;
}
}
...@@ -172,7 +172,7 @@ public class Page { ...@@ -172,7 +172,7 @@ public class Page {
buff = ByteBuffer.allocate(128); buff = ByteBuffer.allocate(128);
fileStore.readFully(filePos, buff); fileStore.readFully(filePos, buff);
maxLength = buff.getInt(); maxLength = buff.getInt();
//read the first bytes again // read the first bytes again
} }
buff = ByteBuffer.allocate(length); buff = ByteBuffer.allocate(length);
fileStore.readFully(filePos, buff); fileStore.readFully(filePos, buff);
......
...@@ -18,6 +18,7 @@ import org.h2.mvstore.Cursor; ...@@ -18,6 +18,7 @@ import org.h2.mvstore.Cursor;
import org.h2.mvstore.DataUtils; import org.h2.mvstore.DataUtils;
import org.h2.mvstore.MVMap; import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore; import org.h2.mvstore.MVStore;
import org.h2.mvstore.OffHeapStore;
import org.h2.mvstore.type.DataType; import org.h2.mvstore.type.DataType;
import org.h2.mvstore.type.ObjectDataType; import org.h2.mvstore.type.ObjectDataType;
import org.h2.mvstore.type.StringDataType; import org.h2.mvstore.type.StringDataType;
...@@ -46,6 +47,7 @@ public class TestMVStore extends TestBase { ...@@ -46,6 +47,7 @@ public class TestMVStore extends TestBase {
public void test() throws Exception { public void test() throws Exception {
FileUtils.deleteRecursive(getBaseDir(), true); FileUtils.deleteRecursive(getBaseDir(), true);
FileUtils.createDirectories(getBaseDir()); FileUtils.createDirectories(getBaseDir());
testOffHeapStorage();
testNewerWriteVersion(); testNewerWriteVersion();
testCompactFully(); testCompactFully();
testBackgroundExceptionListener(); testBackgroundExceptionListener();
...@@ -92,6 +94,29 @@ public class TestMVStore extends TestBase { ...@@ -92,6 +94,29 @@ public class TestMVStore extends TestBase {
testLargerThan2G(); testLargerThan2G();
} }
private void testOffHeapStorage() throws Exception {
OffHeapStore offHeap = new OffHeapStore();
MVStore s = new MVStore.Builder().
fileStore(offHeap).
open();
Map<Integer, String> map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
map.put(i, "Hello " + i);
s.store();
}
assertTrue(1000 < offHeap.getWriteCount());
// s.close();
s = new MVStore.Builder().
fileStore(offHeap).
open();
map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
assertEquals("Hello " + i, map.get(i));
}
s.close();
}
private void testNewerWriteVersion() throws Exception { private void testNewerWriteVersion() throws Exception {
String fileName = getBaseDir() + "/testNewerWriteVersion.h3"; String fileName = getBaseDir() + "/testNewerWriteVersion.h3";
FileUtils.delete(fileName); FileUtils.delete(fileName);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论