提交 e64b0d36 authored 作者: Thomas Mueller's avatar Thomas Mueller

Off-heap storage

上级 2a280505
...@@ -32,6 +32,7 @@ MVStore ...@@ -32,6 +32,7 @@ MVStore
<a href="#transactions">- Transactions</a><br /> <a href="#transactions">- Transactions</a><br />
<a href="#inMemory">- In-Memory Performance and Usage</a><br /> <a href="#inMemory">- In-Memory Performance and Usage</a><br />
<a href="#dataTypes">- Pluggable Data Types</a><br /> <a href="#dataTypes">- Pluggable Data Types</a><br />
<a href="#offHeap">- Off-Heap and Pluggable Storage</a><br />
<a href="#blob">- BLOB Support</a><br /> <a href="#blob">- BLOB Support</a><br />
<a href="#pluggableMap">- R-Tree and Pluggable Map Implementations</a><br /> <a href="#pluggableMap">- R-Tree and Pluggable Map Implementations</a><br />
<a href="#caching">- Concurrent Operations and Caching</a><br /> <a href="#caching">- Concurrent Operations and Caching</a><br />
...@@ -60,8 +61,11 @@ But it can be also directly within an application, without using JDBC or SQL. ...@@ -60,8 +61,11 @@ But it can be also directly within an application, without using JDBC or SQL.
</li><li>It is intended to be fast, simple to use, and small. </li><li>It is intended to be fast, simple to use, and small.
</li><li>Old versions of the data can be read concurrently with all other operations. </li><li>Old versions of the data can be read concurrently with all other operations.
</li><li>Transaction are supported (including concurrent transactions and 2-phase commit). </li><li>Transaction are supported (including concurrent transactions and 2-phase commit).
</li><li>The tool is very modular. It supports pluggable data types / serialization, </li><li>The tool is very modular.
pluggable map implementations (B-tree, R-tree, concurrent B-tree currently), BLOB storage, It supports pluggable data types / serialization,
pluggable storage (to a file, to off-heap memory),
pluggable map implementations (B-tree, R-tree, concurrent B-tree currently),
BLOB storage,
and a file system abstraction to support encrypted files and zip files. and a file system abstraction to support encrypted files and zip files.
</li></ul> </li></ul>
...@@ -102,6 +106,7 @@ MVStore s = new MVStore.Builder(). ...@@ -102,6 +106,7 @@ MVStore s = new MVStore.Builder().
compressData(). compressData().
encryptionKey("007".toCharArray()). encryptionKey("007".toCharArray()).
fileName(fileName). fileName(fileName).
fileStore(new FileStore()).
pageSplitSize(6 * 1024). pageSplitSize(6 * 1024).
readOnly(). readOnly().
writeBufferSize(8). writeBufferSize(8).
...@@ -114,6 +119,7 @@ MVStore s = new MVStore.Builder(). ...@@ -114,6 +119,7 @@ MVStore s = new MVStore.Builder().
</li><li>compressData: compress the data when storing. </li><li>compressData: compress the data when storing.
</li><li>encryptionKey: the encryption key for file encryption. </li><li>encryptionKey: the encryption key for file encryption.
</li><li>fileName: the name of the file, for file based stores. </li><li>fileName: the name of the file, for file based stores.
</li><li>fileStore: the storage implementation to use.
</li><li>pageSplitSize: the point where pages are split. </li><li>pageSplitSize: the point where pages are split.
</li><li>readOnly: open the file in read-only mode. </li><li>readOnly: open the file in read-only mode.
</li><li>writeBufferSize: the size of the write buffer in MB. </li><li>writeBufferSize: the size of the write buffer in MB.
...@@ -277,6 +283,19 @@ Also, there is no inherent limit to the number of maps and chunks. ...@@ -277,6 +283,19 @@ Also, there is no inherent limit to the number of maps and chunks.
Due to using a log structured storage, there is no special case handling for large keys or pages. Due to using a log structured storage, there is no special case handling for large keys or pages.
</p> </p>
<h3 id="offHeap">Off-Heap and Pluggable Storage</h3>
<p>
Storage is pluggable. The default storage is to a single file (unless pure in-memory operation is used).
</p>
<p>
An off-heap storage implementation is available. This storage keeps the data in the off-heap memory,
meaning outside of the regular garbage collected heap. This allows to use very large in-memory
stores without having to increase the JVM heap (which would increase Java garbage collection
cost a lot). Memory is allocated using <code>ByteBuffer.allocateDirect</code>.
One chunk is allocated at a time (each chunk is usually a few MB large), so that
allocation cost is low.
</p>
<h3 id="blob">BLOB Support</h3> <h3 id="blob">BLOB Support</h3>
<p> <p>
There is a mechanism that stores large binary objects by splitting them into smaller blocks. There is a mechanism that stores large binary objects by splitting them into smaller blocks.
......
...@@ -18,17 +18,28 @@ import org.h2.store.fs.FilePathCrypt; ...@@ -18,17 +18,28 @@ import org.h2.store.fs.FilePathCrypt;
import org.h2.store.fs.FilePathNio; import org.h2.store.fs.FilePathNio;
/** /**
* The storage mechanism of the MVStore. * The default storage mechanism of the MVStore. This implementation persists
* data to a file. The file store is responsible to persist data and for free
* space management.
*/ */
public class FileStore { public class FileStore {
private String fileName; protected long readCount;
private boolean readOnly; protected long writeCount;
private FileChannel file;
private FileLock fileLock; /**
private long fileSize; * The free spaces between the chunks. The first block to use is block 2
private long readCount; * (the first two blocks are the store header).
private long writeCount; */
protected final FreeSpaceBitSet freeSpace = new FreeSpaceBitSet(2, MVStore.BLOCK_SIZE);
protected String fileName;
protected boolean readOnly;
protected long fileSize;
protected FileChannel file;
protected FileLock fileLock;
@Override @Override
public String toString() { public String toString() {
...@@ -46,16 +57,6 @@ public class FileStore { ...@@ -46,16 +57,6 @@ public class FileStore {
DataUtils.writeFully(file, pos, src); DataUtils.writeFully(file, pos, src);
} }
/**
* Mark the space within the file as unused.
*
* @param pos
* @param length
*/
public void free(long pos, int length) {
}
public void open(String fileName, boolean readOnly, char[] encryptionKey) { public void open(String fileName, boolean readOnly, char[] encryptionKey) {
if (fileName != null && fileName.indexOf(':') < 0) { if (fileName != null && fileName.indexOf(':') < 0) {
// NIO is used, unless a different file system is specified // NIO is used, unless a different file system is specified
...@@ -109,6 +110,7 @@ public class FileStore { ...@@ -109,6 +110,7 @@ public class FileStore {
fileLock = null; fileLock = null;
} }
file.close(); file.close();
freeSpace.clear();
} catch (Exception e) { } catch (Exception e) {
throw DataUtils.newIllegalStateException( throw DataUtils.newIllegalStateException(
DataUtils.ERROR_WRITING_FAILED, DataUtils.ERROR_WRITING_FAILED,
...@@ -134,6 +136,7 @@ public class FileStore { ...@@ -134,6 +136,7 @@ public class FileStore {
public void truncate(long size) { public void truncate(long size) {
try { try {
writeCount++;
file.truncate(size); file.truncate(size);
fileSize = Math.min(fileSize, size); fileSize = Math.min(fileSize, size);
} catch (IOException e) { } catch (IOException e) {
...@@ -178,4 +181,38 @@ public class FileStore { ...@@ -178,4 +181,38 @@ public class FileStore {
return readOnly; return readOnly;
} }
public int getDefaultRetentionTime() {
return 45000;
}
public void markUsed(long start, int len) {
freeSpace.markUsed(start, len);
}
public long allocate(int length) {
return freeSpace.allocate(length);
}
/**
* Mark the space as free.
*
* @param pos the position in bytes
* @param length the number of bytes
*/
public void free(long pos, int length) {
freeSpace.free(pos, length);
}
public int getFillRate() {
return freeSpace.getFillRate();
}
public long getFirstFree() {
return freeSpace.getFirstFree();
}
public void clear() {
freeSpace.clear();
}
} }
...@@ -41,16 +41,18 @@ Documentation ...@@ -41,16 +41,18 @@ Documentation
- rolling docs review: at "Transactions" - rolling docs review: at "Transactions"
- better document that writes are in background thread - better document that writes are in background thread
- better document how to do non-unique indexes - better document how to do non-unique indexes
- document pluggable store and OffHeapStore
TestMVStoreDataLoss TestMVStoreDataLoss
MVTableEngine:
- use StreamStore
TransactionStore: TransactionStore:
MVStore: MVStore:
- automated 'kill process' and 'power failure' test - automated 'kill process' and 'power failure' test
- update checkstyle - update checkstyle
- maybe split database into multiple files, to speed up compact
and allow using trim (by truncating / deleting empty files)
- auto-compact from time to time and on close - auto-compact from time to time and on close
- test and possibly improve compact operation (for large dbs) - test and possibly improve compact operation (for large dbs)
- possibly split chunk metadata into immutable and mutable - possibly split chunk metadata into immutable and mutable
...@@ -62,7 +64,6 @@ MVStore: ...@@ -62,7 +64,6 @@ MVStore:
split directly (specially for leaves with one large entry) split directly (specially for leaves with one large entry)
- maybe let a chunk point to a list of potential next chunks - maybe let a chunk point to a list of potential next chunks
(so no fixed location header is needed), similar to a skip list (so no fixed location header is needed), similar to a skip list
- support stores that span multiple files (chunks stored in other files)
- triggers (can be implemented with a custom map); - triggers (can be implemented with a custom map);
maybe implement database indexing with triggers maybe implement database indexing with triggers
- store number of write operations per page (maybe defragment - store number of write operations per page (maybe defragment
...@@ -78,13 +79,13 @@ MVStore: ...@@ -78,13 +79,13 @@ MVStore:
- StreamStore optimization: avoid copying bytes in memory - StreamStore optimization: avoid copying bytes in memory
- Feature shrink a store (create, copy, rename, delete) - Feature shrink a store (create, copy, rename, delete)
and for MVStore on Windows, auto-detect renamed file and for MVStore on Windows, auto-detect renamed file
- ensure data is overwritten eventually if the system doesn't have a timer - ensure data is overwritten eventually if the system doesn't have a
real-time clock (Raspberry Pi) and if there are few writes per startup
- SSD-friendly write (always in blocks of 4 MB / 1 second?) - SSD-friendly write (always in blocks of 4 MB / 1 second?)
- close the file on out of memory or disk write error (out of disk space or so) - close the file on out of memory or disk write error (out of disk space or so)
- implement a sharded map (in one store, multiple stores) - implement a sharded map (in one store, multiple stores)
to support concurrent updates and writes, and very large maps to support concurrent updates and writes, and very large maps
- implement an off-heap file system - maybe support for writing to branches
- add support for writing to branches
- maybe add an optional finalizer and exit hook - maybe add an optional finalizer and exit hook
to store committed changes to store committed changes
- to save space when persisting very small transactions, - to save space when persisting very small transactions,
...@@ -107,15 +108,15 @@ MVStore: ...@@ -107,15 +108,15 @@ MVStore:
- rename setStoreVersion to setDataVersion or similar - rename setStoreVersion to setDataVersion or similar
- to save space for small chunks, combine the last partial - to save space for small chunks, combine the last partial
block with the header block with the header
- off-heap storage (with lower default retention time)
- temporary file storage - temporary file storage
- simple rollback method (rollback to last committed version) - simple rollback method (rollback to last committed version)
- MVMap to implement SortedMap, then NavigableMap - MVMap to implement SortedMap, then NavigableMap
- add abstraction ChunkStore,
with free space handling, retention time / flush,
possibly one file per chunk to support SSD trim on file system level,
and free up memory for off-heap storage)
- Test with OSGi - Test with OSGi
- avoid copying data from ByteBuffer to another ByteBuffer if possible,
specially for the OffHeapStore
- storage that splits database into multiple files,
to speed up compact and allow using trim
(by truncating / deleting empty files)
*/ */
...@@ -143,12 +144,6 @@ public class MVStore { ...@@ -143,12 +144,6 @@ public class MVStore {
*/ */
volatile Thread backgroundThread; volatile Thread backgroundThread;
/**
* The free spaces between the chunks. The first block to use is block 2
* (the first two blocks are the store header).
*/
private final FreeSpaceBitSet freeSpace = new FreeSpaceBitSet(2, BLOCK_SIZE);
private volatile boolean reuseSpace = true; private volatile boolean reuseSpace = true;
private boolean closed; private boolean closed;
...@@ -160,8 +155,8 @@ public class MVStore { ...@@ -160,8 +155,8 @@ public class MVStore {
private long rootChunkStart; private long rootChunkStart;
/** /**
* The cache. The default size is 16 MB, and the average size is 2 KB. It is * The page cache. The default size is 16 MB, and the average size is 2 KB.
* split in 16 segments. The stack move distance is 2% of the expected * It is split in 16 segments. The stack move distance is 2% of the expected
* number of entries. * number of entries.
*/ */
private final CacheLongKeyLIRS<Page> cache; private final CacheLongKeyLIRS<Page> cache;
...@@ -175,12 +170,15 @@ public class MVStore { ...@@ -175,12 +170,15 @@ public class MVStore {
new ConcurrentHashMap<Integer, Chunk>(); new ConcurrentHashMap<Integer, Chunk>();
/** /**
* The map of temporarily removed pages. The key is the unsaved version, the * The map of temporarily freed storage space caused by freed pages. The key
* value is the map of chunks. The maps of chunks contains the number of * is the unsaved version, the value is the map of chunks. The maps contains
* freed entries per chunk. Access is synchronized. * the number of freed entries per chunk. Access is synchronized.
*/ */
private final HashMap<Long, HashMap<Integer, Chunk>> freedPages = New.hashMap(); private final HashMap<Long, HashMap<Integer, Chunk>> freedPageSpace = New.hashMap();
/**
* The metadata map.
*/
private MVMapConcurrent<String, String> meta; private MVMapConcurrent<String, String> meta;
private final ConcurrentHashMap<Integer, MVMap<?, ?>> maps = private final ConcurrentHashMap<Integer, MVMap<?, ?>> maps =
...@@ -218,7 +216,7 @@ public class MVStore { ...@@ -218,7 +216,7 @@ public class MVStore {
* The time the store was created, in milliseconds since 1970. * The time the store was created, in milliseconds since 1970.
*/ */
private long creationTime; private long creationTime;
private int retentionTime = 45000; private int retentionTime;
private long lastStoreTime; private long lastStoreTime;
...@@ -262,12 +260,16 @@ public class MVStore { ...@@ -262,12 +260,16 @@ public class MVStore {
c.put("id", "0"); c.put("id", "0");
c.put("createVersion", Long.toString(currentVersion)); c.put("createVersion", Long.toString(currentVersion));
meta.init(this, c); meta.init(this, c);
String f = (String) config.get("fileName"); fileStore = (FileStore) config.get("fileStore");
if (f == null) { String fileName = (String) config.get("fileName");
if (fileName == null && fileStore == null) {
cache = null; cache = null;
return; return;
} }
fileStore = new FileStore(); if (fileStore == null) {
fileStore = new FileStore();
}
retentionTime = fileStore.getDefaultRetentionTime();
boolean readOnly = config.containsKey("readOnly"); boolean readOnly = config.containsKey("readOnly");
o = config.get("cacheSize"); o = config.get("cacheSize");
int mb = o == null ? 16 : (Integer) o; int mb = o == null ? 16 : (Integer) o;
...@@ -284,7 +286,7 @@ public class MVStore { ...@@ -284,7 +286,7 @@ public class MVStore {
unsavedPageCountMax = writeBufferSize / (div == 0 ? 1 : div); unsavedPageCountMax = writeBufferSize / (div == 0 ? 1 : div);
char[] encryptionKey = (char[]) config.get("encryptionKey"); char[] encryptionKey = (char[]) config.get("encryptionKey");
try { try {
fileStore.open(f, readOnly, encryptionKey); fileStore.open(fileName, readOnly, encryptionKey);
if (fileStore.size() == 0) { if (fileStore.size() == 0) {
creationTime = 0; creationTime = 0;
creationTime = getTime(); creationTime = getTime();
...@@ -537,14 +539,13 @@ public class MVStore { ...@@ -537,14 +539,13 @@ public class MVStore {
registerFreePage(currentVersion, c.id, 0, 0); registerFreePage(currentVersion, c.id, 0, 0);
} }
} }
// rebuild the free space list // build the free space list
freeSpace.clear();
for (Chunk c : chunks.values()) { for (Chunk c : chunks.values()) {
if (c.start == Long.MAX_VALUE) { if (c.start == Long.MAX_VALUE) {
continue; continue;
} }
int len = MathUtils.roundUpInt(c.length, BLOCK_SIZE) + BLOCK_SIZE; int len = MathUtils.roundUpInt(c.length, BLOCK_SIZE) + BLOCK_SIZE;
freeSpace.markUsed(c.start, len); fileStore.markUsed(c.start, len);
} }
} }
...@@ -687,7 +688,6 @@ public class MVStore { ...@@ -687,7 +688,6 @@ public class MVStore {
} }
meta = null; meta = null;
chunks.clear(); chunks.clear();
freeSpace.clear();
cache.clear(); cache.clear();
maps.clear(); maps.clear();
try { try {
...@@ -849,7 +849,7 @@ public class MVStore { ...@@ -849,7 +849,7 @@ public class MVStore {
meta.put("root." + m.getId(), String.valueOf(Integer.MAX_VALUE)); meta.put("root." + m.getId(), String.valueOf(Integer.MAX_VALUE));
} }
} }
Set<Chunk> removedChunks = applyFreedPages(storeVersion, time); Set<Chunk> removedChunks = applyFreedSpace(storeVersion, time);
ByteBuffer buff = getWriteBuffer(); ByteBuffer buff = getWriteBuffer();
// need to patch the header later // need to patch the header later
c.writeHeader(buff); c.writeHeader(buff);
...@@ -884,17 +884,17 @@ public class MVStore { ...@@ -884,17 +884,17 @@ public class MVStore {
long end = getEndPosition(); long end = getEndPosition();
long filePos; long filePos;
if (reuseSpace) { if (reuseSpace) {
filePos = freeSpace.allocate(length); filePos = fileStore.allocate(length);
} else { } else {
filePos = end; filePos = end;
freeSpace.markUsed(end, length); fileStore.markUsed(end, length);
} }
boolean storeAtEndOfFile = filePos + length >= end; boolean storeAtEndOfFile = filePos + length >= end;
// free up the space of unused chunks now // free up the space of unused chunks now
for (Chunk x : removedChunks) { for (Chunk x : removedChunks) {
int len = MathUtils.roundUpInt(x.length, BLOCK_SIZE) + BLOCK_SIZE; int len = MathUtils.roundUpInt(x.length, BLOCK_SIZE) + BLOCK_SIZE;
freeSpace.free(x.start, len); fileStore.free(x.start, len);
} }
c.start = filePos; c.start = filePos;
...@@ -985,23 +985,23 @@ public class MVStore { ...@@ -985,23 +985,23 @@ public class MVStore {
} }
/** /**
* Apply the freed pages to the chunk metadata. The metadata is updated, but * Apply the freed space to the chunk metadata. The metadata is updated, but
* freed chunks are not removed yet. * freed chunks are not removed yet.
* *
* @param storeVersion apply up to the given version * @param storeVersion apply up to the given version
* @return the set of freed chunks (might be empty) * @return the set of completely freed chunks (might be empty)
*/ */
private Set<Chunk> applyFreedPages(long storeVersion, long time) { private Set<Chunk> applyFreedSpace(long storeVersion, long time) {
Set<Chunk> removedChunks = New.hashSet(); Set<Chunk> removedChunks = New.hashSet();
synchronized (freedPages) { synchronized (freedPageSpace) {
while (true) { while (true) {
ArrayList<Chunk> modified = New.arrayList(); ArrayList<Chunk> modified = New.arrayList();
for (Iterator<Long> it = freedPages.keySet().iterator(); it.hasNext();) { for (Iterator<Long> it = freedPageSpace.keySet().iterator(); it.hasNext();) {
long v = it.next(); long v = it.next();
if (v > storeVersion) { if (v > storeVersion) {
continue; continue;
} }
Map<Integer, Chunk> freed = freedPages.get(v); Map<Integer, Chunk> freed = freedPageSpace.get(v);
for (Chunk f : freed.values()) { for (Chunk f : freed.values()) {
Chunk c = chunks.get(f.id); Chunk c = chunks.get(f.id);
if (c == null) { if (c == null) {
...@@ -1147,12 +1147,12 @@ public class MVStore { ...@@ -1147,12 +1147,12 @@ public class MVStore {
chunks.remove(c.id); chunks.remove(c.id);
meta.remove("chunk." + c.id); meta.remove("chunk." + c.id);
int length = MathUtils.roundUpInt(c.length, BLOCK_SIZE) + BLOCK_SIZE; int length = MathUtils.roundUpInt(c.length, BLOCK_SIZE) + BLOCK_SIZE;
freeSpace.free(c.start, length); fileStore.free(c.start, length);
} }
if (freeSpace.getFillRate() == 100) { if (fileStore.getFillRate() == 100) {
return false; return false;
} }
long firstFree = freeSpace.getFirstFree(); long firstFree = fileStore.getFirstFree();
ArrayList<Chunk> move = New.arrayList(); ArrayList<Chunk> move = New.arrayList();
for (Chunk c : chunks.values()) { for (Chunk c : chunks.values()) {
if (c.start > firstFree) { if (c.start > firstFree) {
...@@ -1166,8 +1166,8 @@ public class MVStore { ...@@ -1166,8 +1166,8 @@ public class MVStore {
buff.limit(length); buff.limit(length);
fileStore.readFully(c.start, buff); fileStore.readFully(c.start, buff);
long end = getEndPosition(); long end = getEndPosition();
freeSpace.markUsed(end, length); fileStore.markUsed(end, length);
freeSpace.free(c.start, length); fileStore.free(c.start, length);
c.start = end; c.start = end;
buff.position(0); buff.position(0);
c.writeHeader(buff); c.writeHeader(buff);
...@@ -1197,8 +1197,8 @@ public class MVStore { ...@@ -1197,8 +1197,8 @@ public class MVStore {
buff = DataUtils.ensureCapacity(buff, length); buff = DataUtils.ensureCapacity(buff, length);
buff.limit(length); buff.limit(length);
fileStore.readFully(c.start, buff); fileStore.readFully(c.start, buff);
long pos = freeSpace.allocate(length); long pos = fileStore.allocate(length);
freeSpace.free(c.start, length); fileStore.free(c.start, length);
buff.position(0); buff.position(0);
c.start = pos; c.start = pos;
c.writeHeader(buff); c.writeHeader(buff);
...@@ -1433,11 +1433,11 @@ public class MVStore { ...@@ -1433,11 +1433,11 @@ public class MVStore {
} }
private void registerFreePage(long version, int chunkId, long maxLengthLive, int pageCount) { private void registerFreePage(long version, int chunkId, long maxLengthLive, int pageCount) {
synchronized (freedPages) { synchronized (freedPageSpace) {
HashMap<Integer, Chunk>freed = freedPages.get(version); HashMap<Integer, Chunk>freed = freedPageSpace.get(version);
if (freed == null) { if (freed == null) {
freed = New.hashMap(); freed = New.hashMap();
freedPages.put(version, freed); freedPageSpace.put(version, freed);
} }
Chunk f = freed.get(chunkId); Chunk f = freed.get(chunkId);
if (f == null) { if (f == null) {
...@@ -1488,19 +1488,21 @@ public class MVStore { ...@@ -1488,19 +1488,21 @@ public class MVStore {
/** /**
* How long to retain old, persisted chunks, in milliseconds. Chunks that * How long to retain old, persisted chunks, in milliseconds. Chunks that
* are older may be overwritten once they contain no live data. The default * are older may be overwritten once they contain no live data.
* is 45000 (45 seconds). It is assumed that a file system and hard disk * <p>
* will flush all write buffers within this time. Using a lower value might * The default value is 45000 (45 seconds) when using the default file
* be dangerous, unless the file system and hard disk flush the buffers * store. It is assumed that a file system and hard disk will flush all
* earlier. To manually flush the buffers, use * write buffers within this time. Using a lower value might be dangerous,
* unless the file system and hard disk flush the buffers earlier. To
* manually flush the buffers, use
* <code>MVStore.getFile().force(true)</code>, however please note that * <code>MVStore.getFile().force(true)</code>, however please note that
* according to various tests this does not always work as expected * according to various tests this does not always work as expected
* depending on the operating system and hardware. * depending on the operating system and hardware.
* <p> * <p>
* This setting is not persisted. * This setting is not persisted.
* *
* @param ms how many milliseconds to retain old chunks (0 to overwrite them * @param ms how many milliseconds to retain old chunks (0 to overwrite them
* as early as possible) * as early as possible)
*/ */
public void setRetentionTime(int ms) { public void setRetentionTime(int ms) {
this.retentionTime = ms; this.retentionTime = ms;
...@@ -1652,10 +1654,12 @@ public class MVStore { ...@@ -1652,10 +1654,12 @@ public class MVStore {
} }
meta.clear(); meta.clear();
chunks.clear(); chunks.clear();
freeSpace.clear(); if (fileStore != null) {
fileStore.clear();
}
maps.clear(); maps.clear();
synchronized (freedPages) { synchronized (freedPageSpace) {
freedPages.clear(); freedPageSpace.clear();
} }
currentVersion = version; currentVersion = version;
setWriteVersion(version); setWriteVersion(version);
...@@ -1669,12 +1673,12 @@ public class MVStore { ...@@ -1669,12 +1673,12 @@ public class MVStore {
for (MVMap<?, ?> m : maps.values()) { for (MVMap<?, ?> m : maps.values()) {
m.rollbackTo(version); m.rollbackTo(version);
} }
synchronized (freedPages) { synchronized (freedPageSpace) {
for (long v = currentVersion; v >= version; v--) { for (long v = currentVersion; v >= version; v--) {
if (freedPages.size() == 0) { if (freedPageSpace.size() == 0) {
break; break;
} }
freedPages.remove(v); freedPageSpace.remove(v);
} }
} }
meta.rollbackTo(version); meta.rollbackTo(version);
...@@ -1704,7 +1708,7 @@ public class MVStore { ...@@ -1704,7 +1708,7 @@ public class MVStore {
} }
chunks.remove(lastChunkId); chunks.remove(lastChunkId);
int len = MathUtils.roundUpInt(last.length, BLOCK_SIZE) + BLOCK_SIZE; int len = MathUtils.roundUpInt(last.length, BLOCK_SIZE) + BLOCK_SIZE;
freeSpace.free(last.start, len); fileStore.free(last.start, len);
lastChunkId--; lastChunkId--;
} }
rootChunkStart = last.start; rootChunkStart = last.start;
...@@ -1739,8 +1743,8 @@ public class MVStore { ...@@ -1739,8 +1743,8 @@ public class MVStore {
} }
private void revertTemp(long storeVersion) { private void revertTemp(long storeVersion) {
synchronized (freedPages) { synchronized (freedPageSpace) {
for (Iterator<Long> it = freedPages.keySet().iterator(); it.hasNext();) { for (Iterator<Long> it = freedPageSpace.keySet().iterator(); it.hasNext();) {
long v = it.next(); long v = it.next();
if (v > storeVersion) { if (v > storeVersion) {
continue; continue;
...@@ -2082,6 +2086,16 @@ public class MVStore { ...@@ -2082,6 +2086,16 @@ public class MVStore {
Thread.UncaughtExceptionHandler exceptionHandler) { Thread.UncaughtExceptionHandler exceptionHandler) {
return set("backgroundExceptionHandler", exceptionHandler); return set("backgroundExceptionHandler", exceptionHandler);
} }
/**
* Use the provided file store instead of the default one.
*
* @param store the file store
* @return this
*/
public Builder fileStore(FileStore store) {
return set("fileStore", store);
}
/** /**
* Open the store. * Open the store.
......
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.mvstore;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.TreeMap;
/**
* A storage mechanism that "persists" data in the off-heap area of the main
* memory.
*/
public class OffHeapStore extends FileStore {
private final TreeMap<Long, ByteBuffer> memory = new TreeMap<Long, ByteBuffer>();
@Override
public void open(String fileName, boolean readOnly, char[] encryptionKey) {
// nothing to do
}
@Override
public String toString() {
return memory.toString();
}
@Override
public void readFully(long pos, ByteBuffer dst) {
Entry<Long, ByteBuffer> mem = memory.floorEntry(pos);
if (mem == null) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not read from position {0}", pos);
}
readCount++;
ByteBuffer buff = mem.getValue();
ByteBuffer read = buff.duplicate();
int offset = (int) (pos - mem.getKey());
read.position(offset);
read.limit(dst.remaining() + offset);
dst.put(read);
dst.rewind();
}
@Override
public void free(long pos, int length) {
freeSpace.free(pos, length);
ByteBuffer buff = memory.remove(pos);
if (buff == null) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not find entry at position {0}", pos);
} else if (buff.remaining() != length) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Partial remove is not supported at position {0}", pos);
}
}
@Override
public void writeFully(long pos, ByteBuffer src) {
fileSize = Math.max(fileSize, pos + src.remaining());
Entry<Long, ByteBuffer> mem = memory.floorEntry(pos);
if (mem == null) {
// not found: create a new entry
writeNewEntry(pos, src);
return;
}
long prevPos = mem.getKey();
ByteBuffer buff = mem.getValue();
int prevLength = buff.capacity();
int length = src.remaining();
if (prevPos == pos) {
if (prevLength != length) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not write to position {0}; partial overwrite is not supported", pos);
}
writeCount++;
buff.rewind();
buff.put(src);
return;
}
if (prevPos + prevLength > pos) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not write to position {0}; partial overwrite is not supported", pos);
}
writeNewEntry(pos, src);
}
private void writeNewEntry(long pos, ByteBuffer src) {
writeCount++;
int length = src.remaining();
ByteBuffer buff = ByteBuffer.allocateDirect(length);
buff.put(src);
buff.rewind();
memory.put(pos, buff);
}
@Override
public void truncate(long size) {
writeCount++;
if (size == 0) {
fileSize = 0;
memory.clear();
return;
}
for (Iterator<Long> it = memory.keySet().iterator(); it.hasNext();) {
long pos = it.next();
if (pos < size) {
break;
}
ByteBuffer buff = memory.get(pos);
if (buff.capacity() > size) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_READING_FAILED,
"Could not truncate to {0}; partial truncate is not supported", pos);
}
it.remove();
}
}
@Override
public void close() {
truncate(0);
freeSpace.clear();
}
@Override
public void sync() {
// nothing to do
}
@Override
public int getDefaultRetentionTime() {
return 0;
}
}
...@@ -172,7 +172,7 @@ public class Page { ...@@ -172,7 +172,7 @@ public class Page {
buff = ByteBuffer.allocate(128); buff = ByteBuffer.allocate(128);
fileStore.readFully(filePos, buff); fileStore.readFully(filePos, buff);
maxLength = buff.getInt(); maxLength = buff.getInt();
//read the first bytes again // read the first bytes again
} }
buff = ByteBuffer.allocate(length); buff = ByteBuffer.allocate(length);
fileStore.readFully(filePos, buff); fileStore.readFully(filePos, buff);
......
...@@ -18,6 +18,7 @@ import org.h2.mvstore.Cursor; ...@@ -18,6 +18,7 @@ import org.h2.mvstore.Cursor;
import org.h2.mvstore.DataUtils; import org.h2.mvstore.DataUtils;
import org.h2.mvstore.MVMap; import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore; import org.h2.mvstore.MVStore;
import org.h2.mvstore.OffHeapStore;
import org.h2.mvstore.type.DataType; import org.h2.mvstore.type.DataType;
import org.h2.mvstore.type.ObjectDataType; import org.h2.mvstore.type.ObjectDataType;
import org.h2.mvstore.type.StringDataType; import org.h2.mvstore.type.StringDataType;
...@@ -46,6 +47,7 @@ public class TestMVStore extends TestBase { ...@@ -46,6 +47,7 @@ public class TestMVStore extends TestBase {
public void test() throws Exception { public void test() throws Exception {
FileUtils.deleteRecursive(getBaseDir(), true); FileUtils.deleteRecursive(getBaseDir(), true);
FileUtils.createDirectories(getBaseDir()); FileUtils.createDirectories(getBaseDir());
testOffHeapStorage();
testNewerWriteVersion(); testNewerWriteVersion();
testCompactFully(); testCompactFully();
testBackgroundExceptionListener(); testBackgroundExceptionListener();
...@@ -92,6 +94,29 @@ public class TestMVStore extends TestBase { ...@@ -92,6 +94,29 @@ public class TestMVStore extends TestBase {
testLargerThan2G(); testLargerThan2G();
} }
private void testOffHeapStorage() throws Exception {
OffHeapStore offHeap = new OffHeapStore();
MVStore s = new MVStore.Builder().
fileStore(offHeap).
open();
Map<Integer, String> map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
map.put(i, "Hello " + i);
s.store();
}
assertTrue(1000 < offHeap.getWriteCount());
// s.close();
s = new MVStore.Builder().
fileStore(offHeap).
open();
map = s.openMap("data");
for (int i = 0; i < 1000; i++) {
assertEquals("Hello " + i, map.get(i));
}
s.close();
}
private void testNewerWriteVersion() throws Exception { private void testNewerWriteVersion() throws Exception {
String fileName = getBaseDir() + "/testNewerWriteVersion.h3"; String fileName = getBaseDir() + "/testNewerWriteVersion.h3";
FileUtils.delete(fileName); FileUtils.delete(fileName);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论