提交 6857dfdd authored 作者: Thomas Mueller's avatar Thomas Mueller

The compression algorithm "LZF" is now about 33% faster.

上级 53c890d9
......@@ -18,7 +18,11 @@ Change Log
<h1>Change Log</h1>
<h2>Next Version (unreleased)</h2>
<ul><li>The test cases don't access the file system directly, this simplifies GAE for Java testing.
<ul><li>The compression algorithm "LZF" is now about 33% faster than before when compressing small block
(around 2 KB). It is much faster than Deflate, but the compression ratio is lower.
</li><li>Compressing large blocks of data didn't work when using the "Deflate" compression algorithm.
Compressing a lot of data could run out of heap memory.
</li><li>The test cases don't access the file system directly, this simplifies GAE for Java testing.
Thanks to Vince Bonfanti.
</li><li>More bugs in the server-less multi-connection mode have been fixed.
</li><li>When running against an old database, the SCRIPT statement could generate a
......
......@@ -40,7 +40,6 @@ package org.h2.compress;
public class CompressLZF implements Compressor {
private static final int HASH_SIZE = 1 << 14;
private static final int[] EMPTY = new int[HASH_SIZE];
private static final int MAX_LITERAL = 1 << 5;
private static final int MAX_OFF = 1 << 13;
private static final int MAX_REF = (1 << 8) + (1 << 3);
......@@ -72,68 +71,76 @@ public class CompressLZF implements Compressor {
int inPos = 0;
if (cachedHashTable == null) {
cachedHashTable = new int[HASH_SIZE];
} else {
System.arraycopy(EMPTY, 0, cachedHashTable, 0, HASH_SIZE);
}
int[] hashTab = cachedHashTable;
int literals = 0;
int hash = first(in, inPos);
while (true) {
if (inPos < inLen - 4) {
outPos++;
int hash = first(in, 0);
while (inPos < inLen - 4) {
byte p2 = in[inPos + 2];
// next
hash = (hash << 8) + (p2 & 255);
int off = hash(hash);
int ref = hashTab[off];
hashTab[off] = inPos;
if (ref < inPos
&& ref > 0
&& (off = inPos - ref - 1) < MAX_OFF
&& in[ref + 2] == p2
&& in[ref + 1] == (byte) (hash >> 8)
&& in[ref] == (byte) (hash >> 16)) {
// match
int maxLen = inLen - inPos - 2;
if (maxLen > MAX_REF) {
maxLen = MAX_REF;
}
if (literals == 0) {
outPos--;
} else {
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
}
int len = 3;
while (len < maxLen && in[ref + len] == in[inPos + len]) {
len++;
}
len -= 2;
if (len < 7) {
out[outPos++] = (byte) ((off >> 8) + (len << 5));
} else {
out[outPos++] = (byte) ((off >> 8) + (7 << 5));
out[outPos++] = (byte) (len - 7);
}
out[outPos++] = (byte) off;
outPos++;
inPos += len;
hash = first(in, inPos);
hash = next(hash, in, inPos);
int off = hash(hash);
int ref = hashTab[off];
hashTab[off] = inPos;
off = inPos - ref - 1;
if (off < MAX_OFF && ref > 0 && in[ref + 2] == in[inPos + 2] && in[ref + 1] == in[inPos + 1] && in[ref] == in[inPos]) {
int maxLen = inLen - inPos - 2;
maxLen = maxLen > MAX_REF ? MAX_REF : maxLen;
int len = 3;
while (len < maxLen && in[ref + len] == in[inPos + len]) {
len++;
}
len -= 2;
if (literals != 0) {
out[outPos++] = (byte) (literals - 1);
literals = -literals;
do {
out[outPos++] = in[inPos + literals++];
} while (literals != 0);
}
if (len < 7) {
out[outPos++] = (byte) ((off >> 8) + (len << 5));
} else {
out[outPos++] = (byte) ((off >> 8) + (7 << 5));
out[outPos++] = (byte) (len - 7);
}
out[outPos++] = (byte) off;
inPos += len;
hash = first(in, inPos);
hash = next(hash, in, inPos);
hashTab[hash(hash)] = inPos++;
hash = next(hash, in, inPos);
hashTab[hash(hash)] = inPos++;
continue;
hashTab[hash(hash)] = inPos++;
hash = next(hash, in, inPos);
hashTab[hash(hash)] = inPos++;
} else {
out[outPos++] = in[inPos++];
literals++;
if (literals == MAX_LITERAL) {
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
outPos++;
}
} else if (inPos == inLen) {
break;
}
inPos++;
}
while (inPos < inLen) {
out[outPos++] = in[inPos++];
literals++;
if (literals == MAX_LITERAL) {
out[outPos++] = (byte) (literals - 1);
literals = -literals;
do {
out[outPos++] = in[inPos + literals++];
} while (literals != 0);
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
outPos++;
}
}
if (literals != 0) {
out[outPos++] = (byte) (literals - 1);
literals = -literals;
do {
out[outPos++] = in[inPos + literals++];
} while (literals != 0);
out[outPos - literals - 1] = (byte) (literals - 1);
if (literals == 0) {
outPos--;
}
return outPos;
}
......
......@@ -6,9 +6,16 @@
*/
package org.h2.test.unit;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Random;
import org.h2.compress.CompressLZF;
import org.h2.compress.Compressor;
import org.h2.constant.SysProperties;
import org.h2.store.fs.FileSystem;
import org.h2.test.TestBase;
import org.h2.tools.CompressTool;
......@@ -17,6 +24,8 @@ import org.h2.tools.CompressTool;
*/
public class TestCompress extends TestBase {
private boolean testPerformance;
/**
* Run just this test.
*
......@@ -26,7 +35,10 @@ public class TestCompress extends TestBase {
TestBase.createCaller().init().test();
}
public void test() throws SQLException {
public void test() throws Exception {
if (testPerformance) {
testDatabase();
}
if (config.big) {
for (int i = 0; i < 100; i++) {
test(i);
......@@ -41,6 +53,62 @@ public class TestCompress extends TestBase {
test(50);
test(200);
}
test(4000000);
testVariableEnd();
}
private void testVariableEnd() throws Exception {
CompressTool utils = CompressTool.getInstance();
StringBuilder buff = new StringBuilder();
for (int i = 0; i < 90; i++) {
buff.append('0');
}
String prefix = buff.toString();
for (int i = 0; i < 100; i++) {
buff = new StringBuilder(prefix);
for (int j = 0; j < i; j++) {
buff.append((char) ('1' + j));
}
String test = buff.toString();
byte[] in = test.getBytes();
assertEquals(in, utils.expand(utils.compress(in, "LZF")));
}
}
private void testDatabase() throws Exception {
deleteDb("memFS:compress");
Connection conn = getConnection("memFS:compress");
Statement stat = conn.createStatement();
ResultSet rs;
rs = stat.executeQuery("select table_name from information_schema.tables");
Statement stat2 = conn.createStatement();
while (rs.next()) {
String table = rs.getString(1);
if (!"COLLATIONS".equals(table)) {
stat2.execute("create table " + table + " as select * from information_schema." + table);
}
}
conn.close();
Compressor compress = new CompressLZF();
int pageSize = SysProperties.PAGE_SIZE;
byte[] buff = new byte[pageSize];
byte[] test = new byte[2 * pageSize];
compress.compress(buff, pageSize, test, 0);
for (int j = 0; j < 4; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < 100; i++) {
InputStream in = FileSystem.getInstance("memFS:").openFileInputStream("memFS:compress.h2.db");
while (true) {
int len = in.read(buff);
if (len < 0) {
break;
}
compress.compress(buff, pageSize, test, 0);
}
in.close();
}
System.out.println(System.currentTimeMillis() - start);
}
}
private void test(int len) throws SQLException {
......@@ -52,13 +120,13 @@ public class TestCompress extends TestBase {
// leave empty
break;
case 1: {
for (int x = 0; x < len; x++) {
buff[x] = (byte) (x & 10);
}
r.nextBytes(buff);
break;
}
case 2: {
r.nextBytes(buff);
for (int x = 0; x < len; x++) {
buff[x] = (byte) (x & 10);
}
break;
}
case 3: {
......@@ -77,9 +145,13 @@ public class TestCompress extends TestBase {
}
}
CompressTool utils = CompressTool.getInstance();
for (String a : new String[] { "LZF", "Deflate", "No" }) {
for (String a : new String[] { "LZF", "No", "Deflate" }) {
long start = System.currentTimeMillis();
byte[] out = utils.compress(buff, a);
byte[] test = utils.expand(out);
if (testPerformance) {
System.out.println("p:" + pattern + " len: " + out.length + " time: " + (System.currentTimeMillis() - start) + " " + a);
}
assertEquals(buff.length, test.length);
assertEquals(buff, test);
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论