提交 5dafb69a authored 作者: Thomas Mueller's avatar Thomas Mueller

A minimal perfect hash function tool: use universal hashing callback (more…

A minimal perfect hash function tool: use universal hashing callback (more protection against hash flooding)
上级 1b19a776
...@@ -96,7 +96,7 @@ public class TestPerfectHash extends TestBase { ...@@ -96,7 +96,7 @@ public class TestPerfectHash extends TestBase {
UniversalHash<String> badHash = new UniversalHash<String>() { UniversalHash<String> badHash = new UniversalHash<String>() {
@Override @Override
public int hashCode(String o, int index) { public int hashCode(String o, int index, int seed) {
if (index < badUntilLevel) { if (index < badUntilLevel) {
return 0; return 0;
} }
......
...@@ -8,6 +8,7 @@ package org.h2.dev.hash; ...@@ -8,6 +8,7 @@ package org.h2.dev.hash;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.security.SecureRandom;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Set; import java.util.Set;
import java.util.zip.Deflater; import java.util.zip.Deflater;
...@@ -45,15 +46,18 @@ import java.util.zip.Inflater; ...@@ -45,15 +46,18 @@ import java.util.zip.Inflater;
* good avalanche effect, or generate random looking data; it just should * good avalanche effect, or generate random looking data; it just should
* produce few conflicts if possible). * produce few conflicts if possible).
* <p> * <p>
* To protect against hash flooding and similar attacks, cryptographically * To protect against hash flooding and similar attacks, a secure random seed
* secure functions such as SipHash or SHA-256 can be used. However, such * per hash table is used. For further protection, cryptographically secure
* (slower) functions only need to be used if regular hash functions produce too * functions such as SipHash or SHA-256 can be used. However, such (slower)
* many conflicts. This case is detected when generating the perfect hash * functions only need to be used if regular hash functions produce too many
* function, by checking if there are too many conflicts (more than 2160 entries * conflicts. This case is detected when generating the perfect hash function,
* in one top-level bucket). In this case, the next hash function is used. That * by checking if there are too many conflicts (more than 2160 entries in one
* way, in the normal case, where no attack is happening, only fast, but less * top-level bucket). In this case, the next hash function is used. That way, in
* secure, hash functions are called. It is fine to use the regular hashCode * the normal case, where no attack is happening, only fast, but less secure,
* method as the level 0 hash function. * hash functions are called. It is fine to use the regular hashCode method as
* the level 0 hash function. However, just relying on the regular hashCode
* method does not work if the key has more than 32 bits, because the risk of
* collisions is too high.
* <p> * <p>
* In-place updating of the hash table is not implemented but possible in * In-place updating of the hash table is not implemented but possible in
* theory, by patching the hash function description. With a small change, * theory, by patching the hash function description. With a small change,
...@@ -92,6 +96,11 @@ public class MinimalPerfectHash<K> { ...@@ -92,6 +96,11 @@ public class MinimalPerfectHash<K> {
* The minimum output value for a small bucket of a given size. * The minimum output value for a small bucket of a given size.
*/ */
private static final int[] SIZE_OFFSETS = new int[MAX_OFFSETS.length + 1]; private static final int[] SIZE_OFFSETS = new int[MAX_OFFSETS.length + 1];
/**
* A secure random generator.
*/
private static final SecureRandom RANDOM = new SecureRandom();
static { static {
int last = SPLIT_MANY + 1; int last = SPLIT_MANY + 1;
...@@ -112,6 +121,11 @@ public class MinimalPerfectHash<K> { ...@@ -112,6 +121,11 @@ public class MinimalPerfectHash<K> {
* key. * key.
*/ */
private final byte[] data; private final byte[] data;
/**
* The random seed.
*/
private final int seed;
/** /**
* The size up to the given root-level bucket in the data array. Used to * The size up to the given root-level bucket in the data array. Used to
...@@ -140,12 +154,16 @@ public class MinimalPerfectHash<K> { ...@@ -140,12 +154,16 @@ public class MinimalPerfectHash<K> {
public MinimalPerfectHash(byte[] desc, UniversalHash<K> hash) { public MinimalPerfectHash(byte[] desc, UniversalHash<K> hash) {
this.hash = hash; this.hash = hash;
byte[] b = data = expand(desc); byte[] b = data = expand(desc);
if (b[0] == SPLIT_MANY) { seed = ((b[0] & 255) << 24) |
((b[1] & 255) << 16) |
((b[2] & 255) << 8) |
(b[3] & 255);
if (b[4] == SPLIT_MANY) {
rootLevel = b[b.length - 1] & 255; rootLevel = b[b.length - 1] & 255;
int split = readVarInt(b, 1); int split = readVarInt(b, 5);
rootSize = new int[split]; rootSize = new int[split];
rootPos = new int[split]; rootPos = new int[split];
int pos = 1 + getVarIntLength(b, 1); int pos = 5 + getVarIntLength(b, 5);
int sizeSum = 0; int sizeSum = 0;
for (int i = 0; i < split; i++) { for (int i = 0; i < split; i++) {
rootSize[i] = sizeSum; rootSize[i] = sizeSum;
...@@ -168,7 +186,7 @@ public class MinimalPerfectHash<K> { ...@@ -168,7 +186,7 @@ public class MinimalPerfectHash<K> {
* @return the hash value * @return the hash value
*/ */
public int get(K x) { public int get(K x) {
return get(0, x, true, rootLevel); return get(4, x, true, rootLevel);
} }
/** /**
...@@ -187,7 +205,7 @@ public class MinimalPerfectHash<K> { ...@@ -187,7 +205,7 @@ public class MinimalPerfectHash<K> {
} else if (n > SPLIT_MANY) { } else if (n > SPLIT_MANY) {
int size = getSize(n); int size = getSize(n);
int offset = getOffset(n, size); int offset = getOffset(n, size);
return hash(x, hash, level, offset, size); return hash(x, hash, level, seed, offset, size);
} }
pos++; pos++;
int split; int split;
...@@ -197,7 +215,7 @@ public class MinimalPerfectHash<K> { ...@@ -197,7 +215,7 @@ public class MinimalPerfectHash<K> {
} else { } else {
split = n; split = n;
} }
int h = hash(x, hash, level, 0, split); int h = hash(x, hash, level, seed, 0, split);
int s; int s;
if (isRoot && rootPos != null) { if (isRoot && rootPos != null) {
s = rootSize[h]; s = rootSize[h];
...@@ -289,7 +307,12 @@ public class MinimalPerfectHash<K> { ...@@ -289,7 +307,12 @@ public class MinimalPerfectHash<K> {
ArrayList<K> list = new ArrayList<K>(); ArrayList<K> list = new ArrayList<K>();
list.addAll(set); list.addAll(set);
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
generate(list, hash, 0, out); int seed = RANDOM.nextInt();
out.write(seed >>> 24);
out.write(seed >>> 16);
out.write(seed >>> 8);
out.write(seed);
generate(list, hash, 0, seed, out);
return compress(out.toByteArray()); return compress(out.toByteArray());
} }
...@@ -301,7 +324,7 @@ public class MinimalPerfectHash<K> { ...@@ -301,7 +324,7 @@ public class MinimalPerfectHash<K> {
* @param out the output stream * @param out the output stream
*/ */
static <K> void generate(ArrayList<K> list, UniversalHash<K> hash, static <K> void generate(ArrayList<K> list, UniversalHash<K> hash,
int level, ByteArrayOutputStream out) { int level, int seed, ByteArrayOutputStream out) {
int size = list.size(); int size = list.size();
if (size <= 1) { if (size <= 1) {
out.write(size); out.write(size);
...@@ -311,7 +334,7 @@ public class MinimalPerfectHash<K> { ...@@ -311,7 +334,7 @@ public class MinimalPerfectHash<K> {
int maxOffset = MAX_OFFSETS[size]; int maxOffset = MAX_OFFSETS[size];
int[] hashes = new int[size]; int[] hashes = new int[size];
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
hashes[i] = hash.hashCode(list.get(i), level); hashes[i] = hash.hashCode(list.get(i), level, seed);
} }
nextOffset: nextOffset:
for (int offset = 0; offset < maxOffset; offset++) { for (int offset = 0; offset < maxOffset; offset++) {
...@@ -344,7 +367,7 @@ public class MinimalPerfectHash<K> { ...@@ -344,7 +367,7 @@ public class MinimalPerfectHash<K> {
} }
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
K x = list.get(i); K x = list.get(i);
ArrayList<K> l = lists.get(hash(x, hash, level, 0, split)); ArrayList<K> l = lists.get(hash(x, hash, level, seed, 0, split));
l.add(x); l.add(x);
if (isRoot && split >= SPLIT_MANY && if (isRoot && split >= SPLIT_MANY &&
l.size() > 36 * DIVIDE * 10) { l.size() > 36 * DIVIDE * 10) {
...@@ -363,10 +386,10 @@ public class MinimalPerfectHash<K> { ...@@ -363,10 +386,10 @@ public class MinimalPerfectHash<K> {
list.clear(); list.clear();
list.trimToSize(); list.trimToSize();
if (multiThreaded) { if (multiThreaded) {
generateMultiThreaded(lists, hash, level, out); generateMultiThreaded(lists, hash, level, seed, out);
} else { } else {
for (ArrayList<K> s2 : lists) { for (ArrayList<K> s2 : lists) {
generate(s2, hash, level + 1, out); generate(s2, hash, level + 1, seed, out);
} }
} }
if (isRoot && split >= SPLIT_MANY) { if (isRoot && split >= SPLIT_MANY) {
...@@ -378,6 +401,7 @@ public class MinimalPerfectHash<K> { ...@@ -378,6 +401,7 @@ public class MinimalPerfectHash<K> {
final ArrayList<ArrayList<K>> lists, final ArrayList<ArrayList<K>> lists,
final UniversalHash<K> hash, final UniversalHash<K> hash,
final int level, final int level,
final int seed,
ByteArrayOutputStream out) { ByteArrayOutputStream out) {
final ArrayList<ByteArrayOutputStream> outList = final ArrayList<ByteArrayOutputStream> outList =
new ArrayList<ByteArrayOutputStream>(); new ArrayList<ByteArrayOutputStream>();
...@@ -398,7 +422,7 @@ public class MinimalPerfectHash<K> { ...@@ -398,7 +422,7 @@ public class MinimalPerfectHash<K> {
list = lists.remove(0); list = lists.remove(0);
outList.add(temp); outList.add(temp);
} }
generate(list, hash, level + 1, temp); generate(list, hash, level + 1, seed, temp);
} }
} }
}; };
...@@ -426,12 +450,13 @@ public class MinimalPerfectHash<K> { ...@@ -426,12 +450,13 @@ public class MinimalPerfectHash<K> {
* *
* @param o the key * @param o the key
* @param level the recursion level * @param level the recursion level
* @param seed the random seed
* @param offset the index of the hash function * @param offset the index of the hash function
* @param size the size of the bucket * @param size the size of the bucket
* @return the hash (a value between 0, including, and the size, excluding) * @return the hash (a value between 0, including, and the size, excluding)
*/ */
private static <K> int hash(K o, UniversalHash<K> hash, int level, int offset, int size) { private static <K> int hash(K o, UniversalHash<K> hash, int level, int seed, int offset, int size) {
int x = hash.hashCode(o, level); int x = hash.hashCode(o, level, seed);
x += level + offset * 16; x += level + offset * 16;
x = ((x >>> 16) ^ x) * 0x45d9f3b; x = ((x >>> 16) ^ x) * 0x45d9f3b;
x = ((x >>> 16) ^ x) * 0x45d9f3b; x = ((x >>> 16) ^ x) * 0x45d9f3b;
...@@ -553,9 +578,10 @@ public class MinimalPerfectHash<K> { ...@@ -553,9 +578,10 @@ public class MinimalPerfectHash<K> {
* @param index the hash function index (index 0 is used first, so the * @param index the hash function index (index 0 is used first, so the
* method should be very fast with index 0; index 1 and so on * method should be very fast with index 0; index 1 and so on
* are only called when really needed) * are only called when really needed)
* @param seed the random seed (always the same for a hash table)
* @return the hash value * @return the hash value
*/ */
int hashCode(T o, int index); int hashCode(T o, int index, int seed);
} }
...@@ -565,7 +591,7 @@ public class MinimalPerfectHash<K> { ...@@ -565,7 +591,7 @@ public class MinimalPerfectHash<K> {
public static class LongHash implements UniversalHash<Long> { public static class LongHash implements UniversalHash<Long> {
@Override @Override
public int hashCode(Long o, int index) { public int hashCode(Long o, int index, int seed) {
if (index == 0) { if (index == 0) {
return o.hashCode(); return o.hashCode();
} else if (index < 8) { } else if (index < 8) {
...@@ -590,7 +616,7 @@ public class MinimalPerfectHash<K> { ...@@ -590,7 +616,7 @@ public class MinimalPerfectHash<K> {
private static final Charset UTF8 = Charset.forName("UTF-8"); private static final Charset UTF8 = Charset.forName("UTF-8");
@Override @Override
public int hashCode(String o, int index) { public int hashCode(String o, int index, int seed) {
if (index == 0) { if (index == 0) {
// use the default hash of a string, which might already be // use the default hash of a string, which might already be
// available // available
...@@ -598,11 +624,11 @@ public class MinimalPerfectHash<K> { ...@@ -598,11 +624,11 @@ public class MinimalPerfectHash<K> {
} else if (index < 8) { } else if (index < 8) {
// use a different hash function, which is fast but not // use a different hash function, which is fast but not
// cryptographically secure // cryptographically secure
return getFastHash(o, index); return getFastHash(o, index ^ seed);
} }
// this method is supposed to be cryptographically secure; // this method is supposed to be cryptographically secure;
// we could use SHA-256 for higher indexes // we could use SHA-256 for higher indexes
return getSipHash24(o, index, 0); return getSipHash24(o, index, seed);
} }
/** /**
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论