Repository: hive Updated Branches: refs/heads/master 3e46515d3 -> 87ce36b45
HIVE-20101: BloomKFilter: Avoid using the local byte[] arrays entirely (Gopal V, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87ce36b4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87ce36b4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87ce36b4 Branch: refs/heads/master Commit: 87ce36b458350db141c4cb4b6336a9a01796370f Parents: 3e46515 Author: Gopal V <[email protected]> Authored: Tue Jul 31 14:10:06 2018 -0700 Committer: Gopal V <[email protected]> Committed: Tue Jul 31 14:10:06 2018 -0700 ---------------------------------------------------------------------- .../apache/hive/common/util/BloomKFilter.java | 30 +++----------------- 1 file changed, 4 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/87ce36b4/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index 5b1914d..3b44d2b 100644 --- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -36,8 +36,6 @@ import java.util.Arrays; * This implementation has much lesser L1 data cache misses than {@link BloomFilter}. */ public class BloomKFilter { - private final byte[] BYTE_ARRAY_4 = new byte[4]; - private final byte[] BYTE_ARRAY_8 = new byte[8]; public static final float DEFAULT_FPP = 0.05f; private static final int DEFAULT_BLOCK_SIZE = 8; private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2)); @@ -149,8 +147,7 @@ public class BloomKFilter { } public void addInt(int val) { - // puts int in little endian order - addBytes(intToByteArrayLE(val)); + addHash(Murmur3.hash64(val)); } @@ -184,6 +181,7 @@ public class BloomKFilter { private boolean testHash(long hash64) { final int hash1 = (int) hash64; final int hash2 = (int) (hash64 >>> 32); + final long[] bits = bitSet.data; int firstHash = hash1 + hash2; // hashcode should be positive, flip all the bits if it's negative @@ -216,7 +214,7 @@ public class BloomKFilter { long expected = 0; for (int i = 0; i < DEFAULT_BLOCK_SIZE; i++) { final long mask = masks[i]; - expected |= (bitSet.data[blockBaseOffset + i] & mask) ^ mask; + expected |= (bits[blockBaseOffset + i] & mask) ^ mask; } // clear the mask for array reuse (this is to avoid masks array allocation in inner loop) @@ -235,7 +233,7 @@ public class BloomKFilter { } public boolean testInt(int val) { - return testBytes(intToByteArrayLE(val)); + return testHash(Murmur3.hash64(val)); } public boolean testLong(long val) { @@ -250,26 +248,6 @@ public class BloomKFilter { return testLong(Double.doubleToLongBits(val)); } - private byte[] intToByteArrayLE(int val) { - BYTE_ARRAY_4[0] = (byte) (val >> 0); - BYTE_ARRAY_4[1] = (byte) (val >> 8); - BYTE_ARRAY_4[2] = (byte) (val >> 16); - BYTE_ARRAY_4[3] = (byte) (val >> 24); - return BYTE_ARRAY_4; - } - - private byte[] longToByteArrayLE(long val) { - BYTE_ARRAY_8[0] = (byte) (val >> 0); - BYTE_ARRAY_8[1] = (byte) (val >> 8); - BYTE_ARRAY_8[2] = (byte) (val >> 16); - BYTE_ARRAY_8[3] = (byte) (val >> 24); - BYTE_ARRAY_8[4] = (byte) (val >> 32); - BYTE_ARRAY_8[5] = (byte) (val >> 40); - BYTE_ARRAY_8[6] = (byte) (val >> 48); - BYTE_ARRAY_8[7] = (byte) (val >> 56); - return BYTE_ARRAY_8; - } - public long sizeInBytes() { return getBitSize() / 8; }
