提交 9d12de92 authored 作者: Thomas Mueller's avatar Thomas Mueller

MVStore table engine: new setting "retention_time" to configure the time to…

MVStore table engine: new setting "retention_time" to configure the time to retain old data. The default is 45 seconds.
上级 d7b1bf4e
......@@ -1455,6 +1455,26 @@ Admin rights are required to execute this command, as it affects all connections
SET REFERENTIAL_INTEGRITY FALSE
"
"Commands (Other)","SET RETENTION_TIME","
SET RETENTION_TIME int
","
This property is only used when using the MVStore storage engine.
How long to retain old, persisted data, in milliseconds.
The default is 45000 (45 seconds), 0 means overwrite data as early as possible.
It is assumed that a file system and hard disk will flush all write buffers within this time.
Using a lower value might be dangerous, unless the file system and hard disk flush the buffers earlier.
To manually flush the buffers, use CHECKPOINT SYNC,
however please note that according to various tests this does not always work as expected
depending on the operating system and hardware.
Admin rights are required to execute this command, as it affects all connections.
This command commits an open transaction.
This setting is persistent.
This setting can be appended to the database URL: ""jdbc:h2:test;RETENTION_TIME=0""
","
SET RETENTION_TIME 0
"
"Commands (Other)","SET SALT HASH","
SET SALT bytes HASH bytes
","
......
......@@ -18,7 +18,11 @@ Change Log
<h1>Change Log</h1>
<h2>Next Version (unreleased)</h2>
<ul><li>The method TableEngine.createTable() now returns a Table object.
<ul><li>A checkpoint is now done every MAX_LOG_SIZE / 2 instead of every
MAX_LOG_SIZE, so that the transaction log doesn't grow as large.
</li><li>MVStore table engine: new setting "retention_time" to configure the
time to retain old data. The default is 45 seconds.
</li><li>The method TableEngine.createTable() now returns a Table object.
</li><li>For read-only databases, for the trace level "debug",
the trace info is written to the temp directory.
</li><li>Closing the file lock will now wait until the background thread is stopped.
......
......@@ -59,6 +59,7 @@ public class Set extends Prepared {
case SetTypes.THROTTLE:
case SetTypes.SCHEMA:
case SetTypes.SCHEMA_SEARCH_PATH:
case SetTypes.RETENTION_TIME:
return true;
default:
}
......@@ -412,6 +413,15 @@ public class Set extends Prepared {
addOrUpdateSetting(name, null, getIntValue());
break;
}
case SetTypes.RETENTION_TIME: {
if (getIntValue() < 0) {
throw DbException.getInvalidValueException("RETENTION_TIME", getIntValue());
}
session.getUser().checkAdmin();
database.setRetentionTime(getIntValue());
addOrUpdateSetting(name, null, getIntValue());
break;
}
default:
DbException.throwInternalError("type="+type);
}
......
......@@ -209,6 +209,11 @@ public class SetTypes {
*/
public static final int JAVA_OBJECT_SERIALIZER = 39;
/**
* The type of a SET RETENTION_TIME statement.
*/
public static final int RETENTION_TIME = 40;
private static final ArrayList<String> TYPES = New.arrayList();
private SetTypes() {
......@@ -257,6 +262,7 @@ public class SetTypes {
list.add(REDO_LOG_BINARY, "REDO_LOG_BINARY");
list.add(BINARY_COLLATION, "BINARY_COLLATION");
list.add(JAVA_OBJECT_SERIALIZER, "JAVA_OBJECT_SERIALIZER");
list.add(RETENTION_TIME, "RETENTION_TIME");
}
/**
......
......@@ -17,6 +17,7 @@ import java.util.Set;
import java.util.StringTokenizer;
import org.h2.api.DatabaseEventListener;
import org.h2.api.JavaObjectSerializer;
import org.h2.command.CommandInterface;
import org.h2.command.ddl.CreateTableData;
import org.h2.command.dml.SetTypes;
import org.h2.constant.DbSettings;
......@@ -176,6 +177,7 @@ public class Database implements DataHandler {
private final int reconnectCheckDelay;
private int logMode;
private MVTableEngine.Store mvStore;
private int retentionTime;
private DbException backgroundException;
private JavaObjectSerializer javaObjectSerializer;
......@@ -279,6 +281,7 @@ public class Database implements DataHandler {
public void setMvStore(MVTableEngine.Store mvStore) {
this.mvStore = mvStore;
this.retentionTime = mvStore.getStore().getRetentionTime();
}
/**
......@@ -1249,6 +1252,11 @@ public class Database implements DataHandler {
}
reconnectModified(false);
if (mvStore != null) {
if (!readOnly) {
if (compactMode != 0) {
mvStore.compact(compactMode == CommandInterface.SHUTDOWN_DEFRAG);
}
}
mvStore.close();
}
closeFiles();
......@@ -1766,6 +1774,17 @@ public class Database implements DataHandler {
}
}
public int getRetentionTime() {
return retentionTime;
}
public void setRetentionTime(int value) {
retentionTime = value;
if (mvStore != null) {
mvStore.getStore().setRetentionTime(value);
}
}
/**
* Check if flush-on-each-commit is enabled.
*
......@@ -1926,11 +1945,16 @@ public class Database implements DataHandler {
* executing the SQL statement CHECKPOINT SYNC.
*/
public synchronized void sync() {
if (readOnly || pageStore == null) {
if (readOnly) {
return;
}
if (mvStore != null) {
mvStore.sync();
}
if (pageStore != null) {
pageStore.sync();
}
}
public int getMaxMemoryRows() {
return maxMemoryRows;
......@@ -2544,4 +2568,5 @@ public class Database implements DataHandler {
javaObjectSerializerName = serializerName;
}
}
}
......@@ -160,6 +160,15 @@ public class FreeSpaceBitSet {
return Math.min(1, (int) (100L * count / total));
}
/**
* Get the position of the first free space.
*
* @return the position.
*/
public long getFirstFree() {
return getPos(set.nextClearBit(0));
}
@Override
public String toString() {
StringBuilder buff = new StringBuilder("[");
......
......@@ -46,14 +46,15 @@ H:3,...
TODO:
Documentation
- rolling docs review: at "Transactions"
TestMVStoreDataLoss
TransactionStore:
MVStore:
- rolling docs review: at "Transactions"
- move setters to the builder, except for setRetainVersion, setReuseSpace,
and settings that are persistent (setStoreVersion)
- in-place compact (first, move chunks to end of file, then move to start)
- automated 'kill process' and 'power failure' test
- update checkstyle
- maybe split database into multiple files, to speed up compact
......@@ -67,6 +68,7 @@ MVStore:
- chunk checksum (header, last page, 2 bytes per page?)
- is there a better name for the file header,
if it's no longer always at the beginning of a file? store header?
root pointer?
- on insert, if the child page is already full, don't load and modify it
split directly (specially for leaves with one large entry)
- maybe let a chunk point to a list of potential next chunks
......@@ -79,11 +81,9 @@ MVStore:
- support maps without values (just existence of the key)
- support maps without keys (counted b-tree features)
- use a small object cache (StringCache), test on Android
- dump values
- dump values (using a callback)
- map split / merge (fast if no overlap)
- auto-save if there are too many changes (required for StreamStore)
- StreamStore optimization: avoid copying bytes
- unlimited transaction size
- MVStoreTool.shrink to shrink a store (create, copy, rename, delete)
and for MVStore on Windows, auto-detect renamed file
- ensure data is overwritten eventually if the system doesn't have a timer
......@@ -113,6 +113,9 @@ MVStore:
possibly using a callback for serialization
- optional pluggable checksum mechanism (per page), which
requires that everything is a page (including headers)
- rename setStoreVersion to setDataVersion or similar
- to save space for small chunks, combine the last partial
block with the header
*/
......@@ -935,13 +938,7 @@ public class MVStore {
}
}
Set<Chunk> removedChunks = applyFreedPages(storeVersion, time);
ByteBuffer buff;
if (writeBuffer != null) {
buff = writeBuffer;
buff.clear();
} else {
buff = ByteBuffer.allocate(1024 * 1024);
}
ByteBuffer buff = getWriteBuffer();
// need to patch the header later
c.writeHeader(buff);
c.maxLength = 0;
......@@ -1035,6 +1032,23 @@ public class MVStore {
return version;
}
/**
* Get a buffer for writing. This caller must synchronize on the store
* before calling the method and until after using the buffer.
*
* @return the buffer
*/
private ByteBuffer getWriteBuffer() {
ByteBuffer buff;
if (writeBuffer != null) {
buff = writeBuffer;
buff.clear();
} else {
buff = ByteBuffer.allocate(1024 * 1024);
}
return buff;
}
private boolean canOverwriteChunk(Chunk c, long time) {
if (c.time + retentionTime > time) {
return false;
......@@ -1184,17 +1198,75 @@ public class MVStore {
}
/**
* Try to reduce the file size. Chunks with a low number of live items will
* be re-written. If the current fill rate is higher than the target fill
* rate, no optimization is done.
* Compact the store by moving all chunks.
*
* @return if anything was written
*/
public synchronized boolean compactFully() {
checkOpen();
if (chunks.size() == 0) {
// nothing to do
return false;
}
if (freeSpace.getFillRate() == 100) {
boolean allFull = true;
for (Chunk c : chunks.values()) {
if (c.maxLength == c.maxLengthLive) {
allFull = false;
break;
}
}
if (allFull) {
return false;
}
}
long firstFree = freeSpace.getFirstFree();
ArrayList<Chunk> move = New.arrayList();
for (Chunk c : chunks.values()) {
if (c.start < firstFree) {
move.add(c);
}
}
ByteBuffer buff = getWriteBuffer();
for (Chunk c : move) {
int length = MathUtils.roundUpInt(c.length, BLOCK_SIZE) + BLOCK_SIZE;
buff = DataUtils.ensureCapacity(buff, length);
DataUtils.readFully(file, c.start, buff);
long pos = getFileSizeUsed();
freeSpace.markUsed(pos, length);
buff.position(0);
c.start = pos;
c.writeHeader(buff);
buff.position(buff.limit() - BLOCK_SIZE);
byte[] header = getFileHeaderBytes();
buff.put(header);
// fill the header with zeroes
buff.put(new byte[BLOCK_SIZE - header.length]);
buff.position(0);
buff.limit(length);
// file.write(buff);
}
int todo;
return false;
}
/**
* Try to reduce the file size by re-writing partially full chunks. Chunks
* with a low number of live items are re-written. If the current fill rate
* is higher than the target fill rate, nothing is done.
* <p>
* Only data of open maps can be moved. For maps that are not open, the old
* chunk is still referenced. Therefore, it is recommended to open all maps
* before calling this method.
*
* @param fillRate the minimum percentage of live entries
* @return if anything was written
*/
public boolean compact(int fillRate) {
public synchronized boolean compact(int fillRate) {
checkOpen();
if (chunks.size() == 0) {
// avoid division by 0
// nothing to do
return false;
}
long maxLengthSum = 0, maxLengthLiveSum = 0;
......@@ -1206,8 +1278,10 @@ public class MVStore {
// avoid division by 0
maxLengthSum = 1;
}
int percentTotal = (int) (100 * maxLengthLiveSum / maxLengthSum);
if (percentTotal > fillRate) {
// the fill rate of all chunks combined
int totalChunkFillRate = (int) (100 * maxLengthLiveSum / maxLengthSum);
if (totalChunkFillRate > fillRate) {
return false;
}
......@@ -1284,6 +1358,9 @@ public class MVStore {
@SuppressWarnings("unchecked")
MVMap<Object, Object> map = (MVMap<Object, Object>) getMap(mapId);
if (map == null) {
// pages of maps that are not open or that have been removed
// later on are not moved (for maps that are not open, the live
// counter is not decremented, so the chunk is not removed)
buff.position(start + pageLength);
continue;
}
......@@ -1449,7 +1526,8 @@ public class MVStore {
* be dangerous, unless the file system and hard disk flush the buffers
* earlier. To manually flush the buffers, use
* <code>MVStore.getFile().force(true)</code>, however please note that
* according to various tests this does not always work as expected.
* according to various tests this does not always work as expected
* depending on the operating system and hardware.
* <p>
* This setting is not persisted.
*
......
......@@ -7,11 +7,13 @@
package org.h2.mvstore.db;
import java.beans.ExceptionListener;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.h2.api.TableEngine;
import org.h2.command.CommandInterface;
import org.h2.command.ddl.CreateTableData;
import org.h2.constant.ErrorCode;
import org.h2.engine.Constants;
......@@ -247,6 +249,28 @@ public class MVTableEngine implements TableEngine {
return new FileChannelInputStream(store.getFile(), false);
}
/**
* Force the changes to disk.
*/
public void sync() {
store();
try {
store.getFile().force(true);
} catch (IOException e) {
throw DbException.convertIOException(e, "Could not sync");
}
}
public void compact(boolean defrag) {
sync();
store.setRetentionTime(0);
while (store.compact(90)) {
System.out.println("compact");
sync();
}
System.out.println("compact done");
}
}
/**
......
......@@ -883,6 +883,7 @@ public class MetaTable extends Table {
add(rows, "MULTI_THREADED", database.isMultiThreaded() ? "1" : "0");
add(rows, "MVCC", database.isMultiVersion() ? "TRUE" : "FALSE");
add(rows, "QUERY_TIMEOUT", "" + session.getQueryTimeout());
add(rows, "RETENTION_TIME", "" + database.getRetentionTime());
add(rows, "LOG", "" + database.getLogMode());
// database settings
ArrayList<String> settingNames = New.arrayList();
......
......@@ -47,6 +47,7 @@ public class TestMVStore extends TestBase {
public void test() throws Exception {
FileUtils.deleteRecursive(getBaseDir(), true);
FileUtils.createDirectories(getBaseDir());
testBackgroundExceptionListener();
testOldVersion();
testAtomicOperations();
......@@ -78,7 +79,8 @@ public class TestMVStore extends TestBase {
testInMemory();
testLargeImport();
testBtreeStore();
testDefragment();
testCompact();
testCompactMapNotOpen();
testReuseSpace();
testRandom();
testKeyValueClasses();
......@@ -87,7 +89,6 @@ public class TestMVStore extends TestBase {
testSimple();
// longer running tests
testLargerThan2G();
}
......@@ -1221,8 +1222,63 @@ public class TestMVStore extends TestBase {
s.close();
}
private void testDefragment() {
String fileName = getBaseDir() + "/testDefragment.h3";
private void testCompactMapNotOpen() {
String fileName = getBaseDir() + "/testCompactNotOpen.h3";
FileUtils.delete(fileName);
MVStore s = openStore(fileName, 1000);
MVMap<Integer, String> m = s.openMap("data");
int factor = 100;
for (int j = 0; j < 10; j++) {
for (int i = j * factor; i < 10 * factor; i++) {
m.put(i, "Hello" + j);
}
s.store();
}
s.close();
s = openStore(fileName);
s.setRetentionTime(0);
Map<String, String> meta = s.getMetaMap();
int chunkCount1 = 0;
for (String k : meta.keySet()) {
if (k.startsWith("chunk.")) {
chunkCount1++;
}
}
assertTrue(s.compact(80));
assertTrue(s.compact(80));
int chunkCount2 = 0;
for (String k : meta.keySet()) {
if (k.startsWith("chunk.")) {
chunkCount2++;
}
}
assertTrue(chunkCount2 >= chunkCount1);
m = s.openMap("data");
assertTrue(s.compact(80));
assertTrue(s.compact(80));
int chunkCount3 = 0;
for (String k : meta.keySet()) {
if (k.startsWith("chunk.")) {
chunkCount3++;
}
}
assertTrue(chunkCount3 < chunkCount1);
for (int i = 0; i < 10 * factor; i++) {
assertEquals("x" + i, "Hello" + (i / factor), m.get(i));
}
s.close();
}
private void testCompact() {
String fileName = getBaseDir() + "/testCompact.h3";
FileUtils.delete(fileName);
long initialLength = 0;
for (int j = 0; j < 20; j++) {
......
......@@ -8,6 +8,7 @@ package org.h2.test.store;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.DriverManager;
......@@ -21,6 +22,7 @@ import org.h2.constant.ErrorCode;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.jdbc.JdbcConnection;
import org.h2.mvstore.MVStoreTool;
import org.h2.store.fs.FileUtils;
import org.h2.test.TestBase;
import org.h2.tools.DeleteDbFiles;
......@@ -45,7 +47,7 @@ public class TestMVTableEngine extends TestBase {
@Override
public void test() throws Exception {
// testShrinkDatabaseFile();
testShrinkDatabaseFile();
testTransactionLogUsuallyNotStored();
testTwoPhaseCommit();
testRecover();
......@@ -73,11 +75,23 @@ public class TestMVTableEngine extends TestBase {
Connection conn;
Statement stat;
long maxSize = 0;
// TODO does not shrink for 45 seconds;
// need an option to configure that
// by default, the database does not shrink for 45 seconds
int retentionTime = 45000;
for (int i = 0; i < 20; i++) {
// the first 10 times, keep the default retention time
// then switch to 0, at which point the database file
// should stop to grow
conn = getConnection(dbName);
stat = conn.createStatement();
if (i == 10) {
stat.execute("set retention_time 0");
retentionTime = 0;
}
ResultSet rs = stat.executeQuery(
"select value from information_schema.settings " +
"where name='RETENTION_TIME'");
assertTrue(rs.next());
assertEquals(retentionTime, rs.getInt(1));
stat.execute("create table test(id int primary key, data varchar)");
stat.execute("insert into test select x, space(1000) from system_range(1, 1000)");
stat.execute("drop table test");
......@@ -90,6 +104,17 @@ public class TestMVTableEngine extends TestBase {
fail(i + " size: " + size + " max: " + maxSize);
}
}
int todo;
// conn = getConnection(dbName);
// stat = conn.createStatement();
// stat.execute("shutdown compact");
// conn.close();
//
// MVStoreTool.dump(getBaseDir() + "/mvstore.mv.db", new PrintWriter(System.out));
//
// long size = FileUtils.size(getBaseDir() + "/mvstore"
// + Constants.SUFFIX_MV_FILE);
// assertTrue(size < 16 * 1024);
}
private void testTransactionLogUsuallyNotStored() {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论