提交 c364baf4 authored 作者: Thomas Mueller's avatar Thomas Mueller

A minimal perfect hash function tool

上级 4300469b
......@@ -9,6 +9,7 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.h2.dev.hash.MinimalPerfectHash;
import org.h2.dev.hash.PerfectHash;
import org.h2.test.TestBase;
......@@ -23,22 +24,45 @@ public class TestPerfectHash extends TestBase {
* @param a ignored
*/
public static void main(String... a) throws Exception {
TestBase.createCaller().init().test();
TestPerfectHash test = (TestPerfectHash) TestBase.createCaller().init();
test.test();
test.measure();
}
/**
* Measure the hash functions.
*/
public void measure() {
int size = 1000000;
int s = testMinimal(size);
System.out.println((double) s / size + " bits/key (minimal)");
s = test(size, true);
System.out.println((double) s / size + " bits/key (minimal old)");
s = test(size, false);
System.out.println((double) s / size + " bits/key (not minimal)");
}
@Override
public void test() {
for (int i = 0; i < 1000; i++) {
for (int i = 0; i < 100; i++) {
testMinimal(i);
}
for (int i = 100; i <= 100000; i *= 10) {
testMinimal(i);
}
for (int i = 0; i < 100; i++) {
test(i, true);
test(i, false);
}
for (int i = 1000; i <= 100000; i *= 10) {
for (int i = 100; i <= 100000; i *= 10) {
test(i, true);
test(i, false);
}
}
void test(int size, boolean minimal) {
private int test(int size, boolean minimal) {
Random r = new Random(size);
HashSet<Integer> set = new HashSet<Integer>();
while (set.size() < size) {
......@@ -53,9 +77,10 @@ public class TestPerfectHash extends TestBase {
assertTrue(max < 1.5 * size);
}
}
return desc.length * 8;
}
int test(byte[] desc, Set<Integer> set) {
private int test(byte[] desc, Set<Integer> set) {
int max = -1;
HashSet<Integer> test = new HashSet<Integer>();
PerfectHash hash = new PerfectHash(desc);
......@@ -69,4 +94,32 @@ public class TestPerfectHash extends TestBase {
}
return max;
}
private int testMinimal(int size) {
Random r = new Random(size);
HashSet<Integer> set = new HashSet<Integer>();
while (set.size() < size) {
set.add(r.nextInt());
}
byte[] desc = MinimalPerfectHash.generate(set);
int max = testMinimal(desc, set);
assertEquals(size - 1, max);
return desc.length * 8;
}
private int testMinimal(byte[] desc, Set<Integer> set) {
int max = -1;
HashSet<Integer> test = new HashSet<Integer>();
MinimalPerfectHash hash = new MinimalPerfectHash(desc);
for (int x : set) {
int h = hash.get(x);
assertTrue(h >= 0);
assertTrue(h <= set.size() * 3);
max = Math.max(max, h);
assertFalse(test.contains(h));
test.add(h);
}
return max;
}
}
......@@ -18,12 +18,12 @@ import java.util.zip.Inflater;
* resulting hash table is about 79% full. The minimal perfect hash function
* needs about 2.3 bits per key.
* <p>
* Generating the hash function takes about 1 second per million keys (linear)
* Generating the hash function takes about 1 second per million keys
* for both perfect hash and minimal perfect hash.
* <p>
* The algorithm is recursive: sets that contain no or only one entry are not
* processed as no conflicts are possible. Sets that contain between 2 and 16
* buckets, up to 16 hash functions are tested to check if they can store the
* entries, up to 16 hash functions are tested to check if they can store the
* data without conflict. If no function was found, the same is tested on a
* larger bucket (except for the minimal perfect hash). If no hash function was
* found, and for larger buckets, the bucket is split into a number of smaller
......@@ -32,7 +32,8 @@ import java.util.zip.Inflater;
* At the end of the generation process, the data is compressed using a general
* purpose compression tool (Deflate / Huffman coding). The uncompressed data is
* around 1.52 bits per key (perfect hash) and 3.72 (minimal perfect hash).
*
* <p>
* Please also note the MinimalPerfectHash class, which uses less space per key.
*/
public class PerfectHash {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论