DRILL-1525: Modify hash functions to use XXHash algorithm
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/2e07c62f Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/2e07c62f Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/2e07c62f Branch: refs/heads/master Commit: 2e07c62f6bd9db240f745b18716b17fecd88d6ef Parents: 3b8dd3b Author: Mehant <meha...@gmail.com> Authored: Thu Aug 28 12:31:32 2014 -0700 Committer: Jinfeng Ni <j...@maprtech.com> Committed: Fri Nov 7 10:50:55 2014 -0800 ---------------------------------------------------------------------- .../drill/exec/expr/fn/impl/HashFunctions.java | 69 ++++---- .../apache/drill/exec/expr/fn/impl/XXHash.java | 173 +++++++++++++++++++ 2 files changed, 207 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java index 7f6d8a5..dd8ac88 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java @@ -69,7 +69,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -84,7 +84,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -101,7 +101,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -116,7 +116,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -133,7 +133,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } } @@ -151,7 +151,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } } @@ -169,7 +169,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } } @@ -184,11 +184,11 @@ public class HashFunctions { } public void eval() { - // TODO: implement hash function for other types if (in.isSet == 0) { out.value = 0; - } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + } + else { + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -202,11 +202,11 @@ public class HashFunctions { } public void eval() { - // TODO: implement hash function for other types if (in.isSet == 0) { out.value = 0; - } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + } + else { + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -221,7 +221,7 @@ public class HashFunctions { } public void eval() { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } @@ -235,7 +235,7 @@ public class HashFunctions { } public void eval() { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } @@ -249,7 +249,7 @@ public class HashFunctions { } public void eval() { - out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer); } } @@ -263,8 +263,7 @@ public class HashFunctions { } public void eval() { - // TODO: implement hash function for other types - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -278,7 +277,7 @@ public class HashFunctions { public void eval() { // TODO: implement hash function for other types - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @FunctionTemplate(name = "hash", scope = FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.INTERNAL) @@ -290,7 +289,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -306,7 +305,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -320,7 +319,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -336,7 +335,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -350,7 +349,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -366,7 +365,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -380,7 +379,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0); } } @@ -396,7 +395,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0); } } } @@ -410,7 +409,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -426,7 +425,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -440,7 +439,7 @@ public class HashFunctions { } public void eval() { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } @@ -456,7 +455,7 @@ public class HashFunctions { if (in.isSet == 0) { out.value = 0; } else { - out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0); } } } @@ -475,7 +474,7 @@ public class HashFunctions { for (int i = 0; i < in.nDecimalDigits; i++) { xor = xor ^ Decimal28SparseHolder.getInteger(i, in.start, in.buffer); } - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0); } } @@ -495,7 +494,7 @@ public class HashFunctions { for (int i = 0; i < in.nDecimalDigits; i++) { xor = xor ^ NullableDecimal28SparseHolder.getInteger(i, in.start, in.buffer); } - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0); } } } @@ -514,7 +513,7 @@ public class HashFunctions { for (int i = 0; i < in.nDecimalDigits; i++) { xor = xor ^ Decimal38SparseHolder.getInteger(i, in.start, in.buffer); } - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0); } } @@ -534,7 +533,7 @@ public class HashFunctions { for (int i = 0; i < in.nDecimalDigits; i++) { xor = xor ^ NullableDecimal38SparseHolder.getInteger(i, in.start, in.buffer); } - out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt(); + out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0); } } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java new file mode 100644 index 0000000..a8a6484 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.expr.fn.impl; + +import io.netty.buffer.DrillBuf; +import io.netty.util.internal.PlatformDependent; + +import com.google.common.primitives.UnsignedLongs; + +public final class XXHash { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(XXHash.class); + + static final long PRIME64_1 = UnsignedLongs.decode("11400714785074694791"); + static final long PRIME64_2 = UnsignedLongs.decode("14029467366897019727"); + static final long PRIME64_3 = UnsignedLongs.decode("1609587929392839161"); + static final long PRIME64_4 = UnsignedLongs.decode("9650029242287828579"); + static final long PRIME64_5 = UnsignedLongs.decode("2870177450012600261"); + + public static long hash64(long start, long bEnd, long seed) { + long len = bEnd - start; + long h64; + long p = start; + + // for long strings (greater than 32 bytes) + if (len >= 32) { + final long limit = bEnd - 32; + long v1 = seed + PRIME64_1 + PRIME64_2; + long v2 = seed + PRIME64_2; + long v3 = seed + 0; + long v4 = seed - PRIME64_1; + + do { + v1 += PlatformDependent.getLong(p) * PRIME64_2; + p = p + 8; + v1 = Long.rotateLeft(v1, 31); + v1 *= PRIME64_1; + + v2 += PlatformDependent.getLong(p) * PRIME64_2; + p = p + 8; + v2 = Long.rotateLeft(v2, 31); + v2 *= PRIME64_1; + + v3 += PlatformDependent.getLong(p) * PRIME64_2; + p = p + 8; + v3 = Long.rotateLeft(v3, 31); + v3 *= PRIME64_1; + + v4 += PlatformDependent.getLong(p) * PRIME64_2; + p = p + 8; + v4 = Long.rotateLeft(v4, 31); + v4 *= PRIME64_1; + } while (p <= limit); + + h64 = Long.rotateLeft(v1, 1) + Long.rotateLeft(v2, 7) + Long.rotateLeft(v3, 12) + Long.rotateLeft(v4, 18); + + v1 *= PRIME64_2; + v1 = Long.rotateLeft(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = Long.rotateLeft(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = Long.rotateLeft(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = Long.rotateLeft(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + + h64 = h64 * PRIME64_1 + PRIME64_4; + } else { + h64 = seed + PRIME64_5; + } + + h64 += len; + + while (p + 8 <= bEnd) { + long k1 = PlatformDependent.getLong(p); + k1 *= PRIME64_2; + k1 = Long.rotateLeft(k1, 31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4; + p += 8; + } + + if (p + 4 <= bEnd) { + h64 ^= PlatformDependent.getInt(p) * PRIME64_1; + h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3; + p += 4; + } + while (p < bEnd) { + h64 ^= PlatformDependent.getByte(p) * PRIME64_5; + h64 = Long.rotateLeft(h64, 11) * PRIME64_1; + p++; + } + + return applyFinalHashComputation(h64); + } + + public static int hash32(int start, int end, DrillBuf buffer){ + long s = buffer.memoryAddress() + start; + long e = buffer.memoryAddress() + end; + return hash32(s, e, 0); + } + + public static int hash32(int val, int seed){ + long h64 = seed + PRIME64_5; + h64 += 4; // add length (4 bytes) to hash value + h64 ^= val * PRIME64_1; + h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3; + return (int) applyFinalHashComputation(h64); + } + + public static int hash32(float val, int seed){ + return hash32(Float.floatToIntBits(val), seed); + } + + public static int hash32(double val, int seed){ + return hash32(Double.doubleToLongBits(val), seed); + } + + public static int hash32(long val, int seed){ + long h64 = seed + PRIME64_5; + h64 += 8; // add length (8 bytes) to hash value + long k1 = val* PRIME64_2; + k1 = Long.rotateLeft(k1, 31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4; + return (int) applyFinalHashComputation(h64); + } + + public static int hash32(long start, long bEnd, long seed){ + return (int) hash64(start, bEnd, seed); + } + + private static long applyFinalHashComputation(long h64) { + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; + } +}