Repository: hive Updated Branches: refs/heads/master db7ff5f3a -> d22fc5b24
HIVE-20893: Fix thread safety issue for bloomK probing filter (Gopal V, reviewed by Slim B) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d22fc5b2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d22fc5b2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d22fc5b2 Branch: refs/heads/master Commit: d22fc5b24c37101284dc8b3686acbe73fa39dea9 Parents: db7ff5f Author: Gopal V <gop...@apache.org> Authored: Sun Nov 11 13:42:16 2018 -0800 Committer: Slim Bouguerra <bs...@apache.org> Committed: Sun Nov 11 13:43:27 2018 -0800 ---------------------------------------------------------------------- .../apache/hive/common/util/BloomKFilter.java | 60 +-- .../hive/common/util/TestBloomFilter.java | 412 +++++++++--------- .../hive/common/util/TestBloomKFilter.java | 436 ++++++++++--------- 3 files changed, 443 insertions(+), 465 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d22fc5b2/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index 3b44d2b..b186477 100644 --- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -35,13 +35,12 @@ import java.util.Arrays; * * This implementation has much lesser L1 data cache misses than {@link BloomFilter}. */ -public class BloomKFilter { +@SuppressWarnings({ "WeakerAccess", "unused" }) public class BloomKFilter { public static final float DEFAULT_FPP = 0.05f; private static final int DEFAULT_BLOCK_SIZE = 8; private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2)); private static final int DEFAULT_BLOCK_OFFSET_MASK = DEFAULT_BLOCK_SIZE - 1; private static final int DEFAULT_BIT_OFFSET_MASK = Long.SIZE - 1; - private final long[] masks = new long[DEFAULT_BLOCK_SIZE]; private final BitSet bitSet; private final int m; private final int k; @@ -50,7 +49,7 @@ public class BloomKFilter { // default block size is set to 8 as most cache line sizes are 64 bytes and also AVX512 friendly private final int totalBlockCount; - static void checkArgument(boolean expression, String message) { + private static void checkArgument(boolean expression, String message) { if (!expression) { throw new IllegalArgumentException(message); } @@ -71,8 +70,8 @@ public class BloomKFilter { /** * A constructor to support rebuilding the BloomFilter from a serialized representation. - * @param bits - * @param numFuncs + * @param bits BloomK sketch data in form of array of longs. + * @param numFuncs Number of functions called as K. */ public BloomKFilter(long[] bits, int numFuncs) { super(); @@ -205,23 +204,16 @@ public class BloomKFilter { } // LSB 3 bits is used to locate offset within the block final int wordOffset = combinedHash & DEFAULT_BLOCK_OFFSET_MASK; + final int absOffset = blockBaseOffset + wordOffset; // Next 6 bits are used to locate offset within a long/word final int bitPos = (combinedHash >>> DEFAULT_BLOCK_SIZE_BITS) & DEFAULT_BIT_OFFSET_MASK; - masks[wordOffset] |= (1L << bitPos); - } - - // traverse data and masks array together, check for set bits - long expected = 0; - for (int i = 0; i < DEFAULT_BLOCK_SIZE; i++) { - final long mask = masks[i]; - expected |= (bits[blockBaseOffset + i] & mask) ^ mask; + final long bloomWord = bits[absOffset]; + if (0 == (bloomWord & (1L << bitPos))) { + return false; + } } - // clear the mask for array reuse (this is to avoid masks array allocation in inner loop) - Arrays.fill(masks, 0); - - // if all bits are set, expected should be 0 - return expected == 0; + return true; } public boolean testString(String val) { @@ -292,18 +284,16 @@ public class BloomKFilter { } /** - * Serialize a bloom filter + * Serialize a bloom filter: + * Serialized BloomKFilter format: + * 1 byte for the number of hash functions. + * 1 big endian int(That is how OutputStream works) for the number of longs in the bitset + * big endian longs in the BloomKFilter bitset * * @param out output stream to write to - * @param bloomFilter BloomKFilter that needs to be seralized + * @param bloomFilter BloomKFilter that needs to be serialized */ public static void serialize(OutputStream out, BloomKFilter bloomFilter) throws IOException { - /** - * Serialized BloomKFilter format: - * 1 byte for the number of hash functions. - * 1 big endian int(That is how OutputStream works) for the number of longs in the bitset - * big endina longs in the BloomKFilter bitset - */ DataOutputStream dataOutputStream = new DataOutputStream(out); dataOutputStream.writeByte(bloomFilter.k); dataOutputStream.writeInt(bloomFilter.getBitSet().length); @@ -335,9 +325,7 @@ public class BloomKFilter { } return new BloomKFilter(data, numHashFunc); } catch (RuntimeException e) { - IOException io = new IOException("Unable to deserialize BloomKFilter"); - io.initCause(e); - throw io; + throw new IOException("Unable to deserialize BloomKFilter", e); } } @@ -350,12 +338,12 @@ public class BloomKFilter { * Merges BloomKFilter bf2 into bf1. * Assumes 2 BloomKFilters with the same size/hash functions are serialized to byte arrays * - * @param bf1Bytes - * @param bf1Start - * @param bf1Length - * @param bf2Bytes - * @param bf2Start - * @param bf2Length + * @param bf1Bytes Data of bloom filter 1. + * @param bf1Start Start index of BF1. + * @param bf1Length BF1 length. + * @param bf2Bytes Data of bloom filter 1 + * @param bf2Start Start index of BF2. + * @param bf2Length BF2 length. */ public static void mergeBloomFilterBytes( byte[] bf1Bytes, int bf1Start, int bf1Length, @@ -382,7 +370,7 @@ public class BloomKFilter { * Bare metal bit set implementation. For performance reasons, this implementation does not check * for index bounds nor expand the bit set size if the specified index is greater than the size. */ - public static class BitSet { + @SuppressWarnings("unused") public static class BitSet { private final long[] data; public BitSet(long bits) { http://git-wip-us.apache.org/repos/asf/hive/blob/d22fc5b2/storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java ---------------------------------------------------------------------- diff --git a/storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java b/storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java index 5bc5eed..ed08266 100644 --- a/storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java +++ b/storage-api/src/test/org/apache/hive/common/util/TestBloomFilter.java @@ -19,11 +19,11 @@ package org.apache.hive.common.util; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.util.ArrayList; import java.util.Random; import org.junit.Assert; @@ -105,30 +105,30 @@ public class TestBloomFilter { byte[] val2 = new byte[]{1, 2, 3, 4, 5}; byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6}; - assertEquals(false, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertFalse(bf.test(val)); + assertFalse( bf.test(val1)); + assertFalse( bf.test(val2)); + assertFalse( bf.test(val3)); bf.add(val); - assertEquals(true, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue( bf.test(val)); + assertFalse( bf.test(val1)); + assertFalse( bf.test(val2)); + assertFalse( bf.test(val3)); bf.add(val1); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue( bf.test(val)); + assertTrue( bf.test(val1)); + assertFalse( bf.test(val2)); + assertFalse( bf.test(val3)); bf.add(val2); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue( bf.test(val)); + assertTrue( bf.test(val1)); + assertTrue( bf.test(val2)); + assertFalse( bf.test(val3)); bf.add(val3); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(true, bf.test(val3)); + assertTrue( bf.test(val)); + assertTrue( bf.test(val1)); + assertTrue( bf.test(val2)); + assertTrue( bf.test(val3)); byte[] randVal = new byte[COUNT]; for (int i = 0; i < COUNT; i++) { @@ -136,14 +136,14 @@ public class TestBloomFilter { bf.add(randVal); } // last value should be present - assertEquals(true, bf.test(randVal)); + assertTrue( bf.test(randVal)); // most likely this value should not exist randVal[0] = 0; randVal[1] = 0; randVal[2] = 0; randVal[3] = 0; randVal[4] = 0; - assertEquals(false, bf.test(randVal)); + assertFalse( bf.test(randVal)); assertEquals(7800, bf.sizeInBytes()); } @@ -156,30 +156,30 @@ public class TestBloomFilter { byte val2 = 2; byte val3 = Byte.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertTrue( bf.testLong(val3)); byte randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -187,9 +187,9 @@ public class TestBloomFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong((byte) -120)); + assertFalse( bf.testLong((byte) -120)); assertEquals(7800, bf.sizeInBytes()); } @@ -202,30 +202,30 @@ public class TestBloomFilter { int val2 = 2; int val3 = Integer.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertTrue( bf.testLong(val3)); int randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -233,9 +233,9 @@ public class TestBloomFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); + assertFalse( bf.testLong(-120)); assertEquals(7800, bf.sizeInBytes()); } @@ -248,30 +248,30 @@ public class TestBloomFilter { long val2 = 2; long val3 = Long.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertTrue( bf.testLong(val3)); long randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -279,9 +279,9 @@ public class TestBloomFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); + assertFalse( bf.testLong(-120)); assertEquals(7800, bf.sizeInBytes()); } @@ -294,30 +294,30 @@ public class TestBloomFilter { float val2 = 2.2f; float val3 = Float.MAX_VALUE; - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertFalse( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertTrue( bf.testDouble(val3)); float randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -325,9 +325,9 @@ public class TestBloomFilter { bf.addDouble(randVal); } // last value should be present - assertEquals(true, bf.testDouble(randVal)); + assertTrue( bf.testDouble(randVal)); // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2f)); + assertFalse( bf.testDouble(-120.2f)); assertEquals(7800, bf.sizeInBytes()); } @@ -340,30 +340,30 @@ public class TestBloomFilter { double val2 = 2.2d; double val3 = Double.MAX_VALUE; - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertFalse( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertTrue( bf.testDouble(val3)); double randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -371,9 +371,9 @@ public class TestBloomFilter { bf.addDouble(randVal); } // last value should be present - assertEquals(true, bf.testDouble(randVal)); + assertTrue( bf.testDouble(randVal)); // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2d)); + assertFalse( bf.testDouble(-120.2d)); assertEquals(7800, bf.sizeInBytes()); } @@ -386,30 +386,30 @@ public class TestBloomFilter { String val2 = "bloom filter"; String val3 = "cuckoo filter"; - assertEquals(false, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertFalse( bf.testString(val)); + assertFalse( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val); - assertEquals(true, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertFalse( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val1); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val2); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val3); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); long randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -417,9 +417,9 @@ public class TestBloomFilter { bf.addString(Long.toString(randVal)); } // last value should be present - assertEquals(true, bf.testString(Long.toString(randVal))); + assertTrue( bf.testString(Long.toString(randVal))); // most likely this value should not exist - assertEquals(false, bf.testString(Long.toString(-120))); + assertFalse( bf.testString(Long.toString(-120))); assertEquals(77944, bf.sizeInBytes()); } @@ -446,25 +446,25 @@ public class TestBloomFilter { bf2.addString(v2); bf2.addString(v3); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(false, bf.testString(v)); - assertEquals(false, bf.testString(v1)); - assertEquals(false, bf.testString(v2)); - assertEquals(false, bf.testString(v3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); + assertFalse( bf.testString(v)); + assertFalse( bf.testString(v1)); + assertFalse( bf.testString(v2)); + assertFalse( bf.testString(v3)); bf.merge(bf2); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(true, bf.testString(v)); - assertEquals(true, bf.testString(v1)); - assertEquals(true, bf.testString(v2)); - assertEquals(true, bf.testString(v3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); + assertTrue( bf.testString(v)); + assertTrue( bf.testString(v1)); + assertTrue( bf.testString(v2)); + assertTrue( bf.testString(v3)); } @Test @@ -488,8 +488,8 @@ public class TestBloomFilter { BloomFilter bf2 = BloomFilter.deserialize(bytesIn); for (String val : inputs) { - assertEquals("Testing bf1 with " + val, true, bf1.testString(val)); - assertEquals("Testing bf2 with " + val, true, bf2.testString(val)); + assertTrue("Testing bf1 with " + val, bf1.testString(val)); + assertTrue("Testing bf2 with " + val, bf2.testString(val)); } } @@ -589,7 +589,7 @@ public class TestBloomFilter { public void testFpp1K() { int size = 1000; BloomFilter bf = new BloomFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -598,18 +598,10 @@ public class TestBloomFilter { assertTrue(bf.testLong(i)); } - for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } - } + fp = getFp(size, bf); double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -621,7 +613,7 @@ public class TestBloomFilter { public void testFpp10K() { int size = 10_000; BloomFilter bf = new BloomFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -630,18 +622,10 @@ public class TestBloomFilter { assertTrue(bf.testLong(i)); } - for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } - } + fp = getFp(size, bf); double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -653,7 +637,7 @@ public class TestBloomFilter { public void testFpp1M() { int size = 1_000_000; BloomFilter bf = new BloomFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -662,18 +646,10 @@ public class TestBloomFilter { assertTrue(bf.testLong(i)); } - for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } - } + fp = getFp(size, bf); double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -685,7 +661,7 @@ public class TestBloomFilter { public void testFpp10M() { int size = 10_000_000; BloomFilter bf = new BloomFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -694,6 +670,19 @@ public class TestBloomFilter { assertTrue(bf.testLong(i)); } + fp = getFp(size, bf); + + double actualFpp = (double) fp / (double) size; + double expectedFpp = BloomFilter.DEFAULT_FPP; + if (actualFpp < expectedFpp) { + assertTrue(actualFpp != 0.0); + } else { + assertEquals(expectedFpp, actualFpp, 0.005); + } + } + + @SuppressWarnings("Duplicates") private int getFp(int size, BloomFilter bf) { + int fp = 0; for (int i = 0; i < size; i++) { int probe = rand.nextInt(); // out of range probes @@ -703,13 +692,6 @@ public class TestBloomFilter { } } } - - double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; - if (actualFpp < expectedFpp) { - assertTrue(actualFpp != 0.0); - } else { - assertEquals(expectedFpp, actualFpp, 0.005); - } + return fp; } } http://git-wip-us.apache.org/repos/asf/hive/blob/d22fc5b2/storage-api/src/test/org/apache/hive/common/util/TestBloomKFilter.java ---------------------------------------------------------------------- diff --git a/storage-api/src/test/org/apache/hive/common/util/TestBloomKFilter.java b/storage-api/src/test/org/apache/hive/common/util/TestBloomKFilter.java index df1b55e..1b4e210 100644 --- a/storage-api/src/test/org/apache/hive/common/util/TestBloomKFilter.java +++ b/storage-api/src/test/org/apache/hive/common/util/TestBloomKFilter.java @@ -19,6 +19,7 @@ package org.apache.hive.common.util; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; @@ -33,7 +34,7 @@ import org.junit.Test; */ public class TestBloomKFilter { private static final int COUNT = 100; - Random rand = new Random(123); + private Random rand = new Random(123); // bloom-1 is known to have higher fpp, to make tests pass give room for another 3% private final double deltaError = 0.03; @@ -89,30 +90,30 @@ public class TestBloomKFilter { byte[] val2 = new byte[]{1, 2, 3, 4, 5}; byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6}; - assertEquals(false, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertFalse(bf.test(val)); + assertFalse(bf.test(val1)); + assertFalse(bf.test(val2)); + assertFalse(bf.test(val3)); bf.add(val); - assertEquals(true, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue(bf.test(val)); + assertFalse(bf.test(val1)); + assertFalse(bf.test(val2)); + assertFalse(bf.test(val3)); bf.add(val1); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue(bf.test(val)); + assertTrue(bf.test(val1)); + assertFalse(bf.test(val2)); + assertFalse(bf.test(val3)); bf.add(val2); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(false, bf.test(val3)); + assertTrue(bf.test(val)); + assertTrue(bf.test(val1)); + assertTrue(bf.test(val2)); + assertFalse(bf.test(val3)); bf.add(val3); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(true, bf.test(val3)); + assertTrue(bf.test(val)); + assertTrue(bf.test(val1)); + assertTrue(bf.test(val2)); + assertTrue(bf.test(val3)); byte[] randVal = new byte[COUNT]; for (int i = 0; i < COUNT; i++) { @@ -120,14 +121,14 @@ public class TestBloomKFilter { bf.add(randVal); } // last value should be present - assertEquals(true, bf.test(randVal)); + assertTrue( bf.test(randVal)); // most likely this value should not exist randVal[0] = 0; randVal[1] = 0; randVal[2] = 0; randVal[3] = 0; randVal[4] = 0; - assertEquals(false, bf.test(randVal)); + assertFalse( bf.test(randVal)); assertEquals(7808, bf.sizeInBytes()); } @@ -140,30 +141,30 @@ public class TestBloomKFilter { byte val2 = 2; byte val3 = Byte.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse(bf.testLong(val)); + assertFalse(bf.testLong(val1)); + assertFalse(bf.testLong(val2)); + assertFalse(bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue(bf.testLong(val)); + assertFalse(bf.testLong(val1)); + assertFalse(bf.testLong(val2)); + assertFalse(bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue(bf.testLong(val)); + assertTrue(bf.testLong(val1)); + assertFalse(bf.testLong(val2)); + assertFalse(bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue(bf.testLong(val)); + assertTrue(bf.testLong(val1)); + assertTrue(bf.testLong(val2)); + assertFalse(bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue(bf.testLong(val)); + assertTrue(bf.testLong(val1)); + assertTrue(bf.testLong(val2)); + assertTrue(bf.testLong(val3)); byte randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -171,9 +172,9 @@ public class TestBloomKFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong((byte) -120)); + assertFalse( bf.testLong((byte) -120)); assertEquals(7808, bf.sizeInBytes()); } @@ -186,30 +187,30 @@ public class TestBloomKFilter { int val2 = 2; int val3 = Integer.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertTrue( bf.testLong(val3)); int randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -217,9 +218,9 @@ public class TestBloomKFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); + assertFalse( bf.testLong(-120)); assertEquals(7808, bf.sizeInBytes()); } @@ -232,30 +233,30 @@ public class TestBloomKFilter { long val2 = 2; long val3 = Long.MAX_VALUE; - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertFalse( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertFalse( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertFalse( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertFalse( bf.testLong(val3)); bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); + assertTrue( bf.testLong(val)); + assertTrue( bf.testLong(val1)); + assertTrue( bf.testLong(val2)); + assertTrue( bf.testLong(val3)); long randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -263,9 +264,9 @@ public class TestBloomKFilter { bf.addLong(randVal); } // last value should be present - assertEquals(true, bf.testLong(randVal)); + assertTrue( bf.testLong(randVal)); // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); + assertFalse( bf.testLong(-120)); assertEquals(7808, bf.sizeInBytes()); } @@ -278,30 +279,30 @@ public class TestBloomKFilter { float val2 = 2.2f; float val3 = Float.MAX_VALUE; - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertFalse( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertTrue( bf.testDouble(val3)); float randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -309,9 +310,9 @@ public class TestBloomKFilter { bf.addDouble(randVal); } // last value should be present - assertEquals(true, bf.testDouble(randVal)); + assertTrue( bf.testDouble(randVal)); // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2f)); + assertFalse( bf.testDouble(-120.2f)); assertEquals(7808, bf.sizeInBytes()); } @@ -324,30 +325,30 @@ public class TestBloomKFilter { double val2 = 2.2d; double val3 = Double.MAX_VALUE; - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertFalse( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertFalse( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertFalse( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertFalse( bf.testDouble(val3)); bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); + assertTrue( bf.testDouble(val)); + assertTrue( bf.testDouble(val1)); + assertTrue( bf.testDouble(val2)); + assertTrue( bf.testDouble(val3)); double randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -355,9 +356,9 @@ public class TestBloomKFilter { bf.addDouble(randVal); } // last value should be present - assertEquals(true, bf.testDouble(randVal)); + assertTrue( bf.testDouble(randVal)); // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2d)); + assertFalse( bf.testDouble(-120.2d)); assertEquals(7808, bf.sizeInBytes()); } @@ -370,30 +371,30 @@ public class TestBloomKFilter { String val2 = "bloom filter"; String val3 = "cuckoo filter"; - assertEquals(false, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertFalse( bf.testString(val)); + assertFalse( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val); - assertEquals(true, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertFalse( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val1); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertFalse( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val2); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertFalse( bf.testString(val3)); bf.addString(val3); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); long randVal = 0; for (int i = 0; i < COUNT; i++) { @@ -401,9 +402,9 @@ public class TestBloomKFilter { bf.addString(Long.toString(randVal)); } // last value should be present - assertEquals(true, bf.testString(Long.toString(randVal))); + assertTrue( bf.testString(Long.toString(randVal))); // most likely this value should not exist - assertEquals(false, bf.testString(Long.toString(-120))); + assertFalse( bf.testString(Long.toString(-120))); assertEquals(77952, bf.sizeInBytes()); } @@ -430,25 +431,25 @@ public class TestBloomKFilter { bf2.addString(v2); bf2.addString(v3); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(false, bf.testString(v)); - assertEquals(false, bf.testString(v1)); - assertEquals(false, bf.testString(v2)); - assertEquals(false, bf.testString(v3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); + assertFalse( bf.testString(v)); + assertFalse( bf.testString(v1)); + assertFalse( bf.testString(v2)); + assertFalse( bf.testString(v3)); bf.merge(bf2); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(true, bf.testString(v)); - assertEquals(true, bf.testString(v1)); - assertEquals(true, bf.testString(v2)); - assertEquals(true, bf.testString(v3)); + assertTrue( bf.testString(val)); + assertTrue( bf.testString(val1)); + assertTrue( bf.testString(val2)); + assertTrue( bf.testString(val3)); + assertTrue( bf.testString(v)); + assertTrue( bf.testString(v1)); + assertTrue( bf.testString(v2)); + assertTrue( bf.testString(v3)); } @Test @@ -472,8 +473,8 @@ public class TestBloomKFilter { BloomKFilter bf2 = BloomKFilter.deserialize(bytesIn); for (String val : inputs) { - assertEquals("Testing bf1 with " + val, true, bf1.testString(val)); - assertEquals("Testing bf2 with " + val, true, bf2.testString(val)); + assertTrue("Testing bf1 with " + val, bf1.testString(val)); + assertTrue("Testing bf2 with " + val, bf2.testString(val)); } } @@ -573,7 +574,7 @@ public class TestBloomKFilter { public void testFpp1K() { int size = 1000; BloomKFilter bf = new BloomKFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -582,18 +583,10 @@ public class TestBloomKFilter { assertTrue(bf.testLong(i)); } - for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } - } + fp = getFp(size, bf); double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomKFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -605,7 +598,7 @@ public class TestBloomKFilter { public void testFpp10K() { int size = 10_000; BloomKFilter bf = new BloomKFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -614,18 +607,10 @@ public class TestBloomKFilter { assertTrue(bf.testLong(i)); } - for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } - } + fp = getFp(size, bf); double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomKFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -634,10 +619,10 @@ public class TestBloomKFilter { } @Test - public void testFpp1M() { + public void testFpp1MLong() { int size = 1_000_000; BloomKFilter bf = new BloomKFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -646,18 +631,35 @@ public class TestBloomKFilter { assertTrue(bf.testLong(i)); } + fp = getFp(size, bf); + + double actualFpp = (double) fp / (double) size; + double expectedFpp = BloomKFilter.DEFAULT_FPP; + if (actualFpp < expectedFpp) { + assertTrue(actualFpp != 0.0); + } else { + assertEquals(expectedFpp, actualFpp, deltaError); + } + } + + @Test + public void testFpp1MFloat() { + int size = 1_000_000; + float constant = 0.12358f; + BloomKFilter bf = new BloomKFilter(size); + int fp; + for (int i = 0; i < size; i++) { + bf.addFloat(i * constant); + } + for (int i = 0; i < size; i++) { - int probe = rand.nextInt(); - // out of range probes - if ((probe > size) || (probe < 0)) { - if (bf.testLong(probe)) { - fp++; - } - } + assertTrue(bf.testFloat(i * constant)); } + fp = getFp(size, bf); + double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; + double expectedFpp = BloomKFilter.DEFAULT_FPP; if (actualFpp < expectedFpp) { assertTrue(actualFpp != 0.0); } else { @@ -669,7 +671,7 @@ public class TestBloomKFilter { public void testFpp10M() { int size = 10_000_000; BloomKFilter bf = new BloomKFilter(size); - int fp = 0; + int fp; for (int i = 0; i < size; i++) { bf.addLong(i); } @@ -678,6 +680,19 @@ public class TestBloomKFilter { assertTrue(bf.testLong(i)); } + fp = getFp(size, bf); + + double actualFpp = (double) fp / (double) size; + double expectedFpp = BloomKFilter.DEFAULT_FPP; + if (actualFpp < expectedFpp) { + assertTrue(actualFpp != 0.0); + } else { + assertEquals(expectedFpp, actualFpp, deltaError); + } + } + + @SuppressWarnings("Duplicates") private int getFp(int size, BloomKFilter bf) { + int fp = 0; for (int i = 0; i < size; i++) { int probe = rand.nextInt(); // out of range probes @@ -687,13 +702,6 @@ public class TestBloomKFilter { } } } - - double actualFpp = (double) fp / (double) size; - double expectedFpp = bf.DEFAULT_FPP; - if (actualFpp < expectedFpp) { - assertTrue(actualFpp != 0.0); - } else { - assertEquals(expectedFpp, actualFpp, deltaError); - } + return fp; } }