This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new a71b960 Add OnHeapGuavaBloomFilterReader (#6147) a71b960 is described below commit a71b96094dc58340274d11dad76b2bc58c7e09ff Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Thu Oct 15 10:36:20 2020 -0700 Add OnHeapGuavaBloomFilterReader (#6147) Add the on-heap version of the guava bloom filter reader Add 2 new fields into the `BloomFilterConfig`: - maxSizeInBytes: if configured, limit the max size of the bloom filter (will use a larger fpp if the configured fpp hits the limit) - loadOnHeap: load the bloom filter on-heap or off-heap --- .../impl/bloom/OnHeapGuavaBloomFilterCreator.java | 16 ++++++- .../index/column/PhysicalColumnIndexContainer.java | 23 ++++----- .../segment/index/loader/IndexLoadingConfig.java | 2 +- .../segment/index/readers/BloomFilterReader.java | 2 +- ...Reader.java => BaseGuavaBloomFilterReader.java} | 36 ++++----------- .../readers/bloom/BloomFilterReaderFactory.java | 6 ++- .../readers/bloom/GuavaBloomFilterReaderUtils.java | 25 ++++++++++ .../bloom/OffHeapGuavaBloomFilterReader.java | 40 ++-------------- .../bloom/OnHeapGuavaBloomFilterReader.java | 54 ++++++++++++++++++++++ .../index/creator/BloomFilterCreatorTest.java | 11 +++-- .../pinot/spi/config/table/BloomFilterConfig.java | 24 ++++++++-- 11 files changed, 148 insertions(+), 91 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java index 1fb9b55..2ca942a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/bloom/OnHeapGuavaBloomFilterCreator.java @@ -27,7 +27,10 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import org.apache.pinot.core.segment.creator.BloomFilterCreator; import org.apache.pinot.core.segment.creator.impl.V1Constants; +import org.apache.pinot.core.segment.index.readers.bloom.GuavaBloomFilterReaderUtils; import org.apache.pinot.spi.config.table.BloomFilterConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -37,6 +40,8 @@ import org.apache.pinot.spi.config.table.BloomFilterConfig; */ @SuppressWarnings("UnstableApiUsage") public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator { + private static final Logger LOGGER = LoggerFactory.getLogger(OnHeapGuavaBloomFilterCreator.class); + public static final int TYPE_VALUE = 1; public static final int VERSION = 1; @@ -46,8 +51,15 @@ public class OnHeapGuavaBloomFilterCreator implements BloomFilterCreator { public OnHeapGuavaBloomFilterCreator(File indexDir, String columnName, int cardinality, BloomFilterConfig bloomFilterConfig) { _bloomFilterFile = new File(indexDir, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION); - _bloomFilter = - BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, bloomFilterConfig.getFpp()); + // Calculate the actual fpp with regards to the max size for the bloom filter + double fpp = bloomFilterConfig.getFpp(); + int maxSizeInBytes = bloomFilterConfig.getMaxSizeInBytes(); + if (maxSizeInBytes > 0) { + double minFpp = GuavaBloomFilterReaderUtils.computeFPP(maxSizeInBytes, cardinality); + fpp = Math.max(fpp, minFpp); + } + LOGGER.info("Creating bloom filter with cardinality: {}, fpp: {}", cardinality, fpp); + _bloomFilter = BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), cardinality, fpp); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java index 0573afe..595a8e0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/column/PhysicalColumnIndexContainer.java @@ -54,6 +54,7 @@ import org.apache.pinot.core.segment.index.readers.text.LuceneTextIndexReader; import org.apache.pinot.core.segment.memory.PinotDataBuffer; import org.apache.pinot.core.segment.store.ColumnIndexType; import org.apache.pinot.core.segment.store.SegmentDirectory; +import org.apache.pinot.spi.config.table.BloomFilterConfig; import org.apache.pinot.spi.data.FieldSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,18 +75,11 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer IndexLoadingConfig indexLoadingConfig, File segmentIndexDir) throws IOException { String columnName = metadata.getColumnName(); - boolean loadInvertedIndex = false; - boolean loadRangeIndex = false; - boolean loadTextIndex = false; - boolean loadOnHeapDictionary = false; - boolean loadBloomFilter = false; - if (indexLoadingConfig != null) { - loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName); - loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName); - loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName); - loadBloomFilter = indexLoadingConfig.getBloomFilterConfigs().containsKey(columnName); - loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName); - } + boolean loadInvertedIndex = indexLoadingConfig.getInvertedIndexColumns().contains(columnName); + boolean loadRangeIndex = indexLoadingConfig.getRangeIndexColumns().contains(columnName); + boolean loadTextIndex = indexLoadingConfig.getTextIndexColumns().contains(columnName); + boolean loadOnHeapDictionary = indexLoadingConfig.getOnHeapDictionaryColumns().contains(columnName); + BloomFilterConfig bloomFilterConfig = indexLoadingConfig.getBloomFilterConfigs().get(columnName); if (segmentReader.hasIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR)) { PinotDataBuffer nullValueVectorBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.NULLVALUE_VECTOR); @@ -107,9 +101,10 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer if (metadata.hasDictionary()) { //bloom filter - if (loadBloomFilter) { + if (bloomFilterConfig != null) { PinotDataBuffer bloomFilterBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.BLOOM_FILTER); - _bloomFilter = BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer); + _bloomFilter = + BloomFilterReaderFactory.getBloomFilterReader(bloomFilterBuffer, bloomFilterConfig.isLoadOnHeap()); } else { _bloomFilter = null; } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java index 798ce61..a6817a0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/IndexLoadingConfig.java @@ -100,7 +100,7 @@ public class IndexLoadingConfig { List<String> bloomFilterColumns = indexingConfig.getBloomFilterColumns(); if (bloomFilterColumns != null) { for (String bloomFilterColumn : bloomFilterColumns) { - _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP)); + _bloomFilterConfigs.put(bloomFilterColumn, new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false)); } } Map<String, BloomFilterConfig> bloomFilterConfigs = indexingConfig.getBloomFilterConfigs(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java index 09a1dcf..2a16206 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/BloomFilterReader.java @@ -36,5 +36,5 @@ public interface BloomFilterReader extends Closeable { * otherwise. * <p>This method is provided to prevent hashing the same value multiple times. */ - boolean mightContain(byte[] hash); + boolean mightContain(long hash1, long hash2); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java similarity index 65% copy from pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java copy to pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java index 56dac45..0c6f5d4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BaseGuavaBloomFilterReader.java @@ -25,11 +25,10 @@ import org.apache.pinot.core.segment.memory.PinotDataBuffer; /** - * Off-heap reader for guava bloom filter. - * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}. + * Base implementation of the reader for guava bloom filter. + * <p>The format of the data should be aligned with the guava bloom filter. */ -@SuppressWarnings("UnstableApiUsage") -public class OffHeapGuavaBloomFilterReader implements BloomFilterReader { +public abstract class BaseGuavaBloomFilterReader implements BloomFilterReader { // Format of the data buffer header: // - Strategy ordinal: 1 byte // - Number of hash functions: 1 byte @@ -39,11 +38,11 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader { private static final int NUM_LONGS_OFFSET = 2; private static final int HEADER_SIZE = 6; - private final int _numHashFunctions; - private final long _numBits; - private final PinotDataBuffer _valueBuffer; + protected final int _numHashFunctions; + protected final long _numBits; + protected final PinotDataBuffer _valueBuffer; - public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) { + public BaseGuavaBloomFilterReader(PinotDataBuffer dataBuffer) { byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET); Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal); _numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF; @@ -53,27 +52,10 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader { @Override public boolean mightContain(String value) { - return mightContain(GuavaBloomFilterReaderUtils.hash(value)); - } - - @Override - public boolean mightContain(byte[] hash) { + byte[] hash = GuavaBloomFilterReaderUtils.hash(value); long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]); long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]); - long combinedHash = hash1; - for (int i = 0; i < _numHashFunctions; i++) { - long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits; - // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the - // reverse order (the first bit is stored as the right most bit of the long). - int longIndex = (int) (bitIndex >>> 6); - int bitIndexInLong = (int) (bitIndex & 0x3F); - int byteIndex = (longIndex << 3) | (7 - (bitIndexInLong >>> 3)); - if ((_valueBuffer.getByte(byteIndex) & (1 << (bitIndexInLong & 7))) == 0) { - return false; - } - combinedHash += hash2; - } - return true; + return mightContain(hash1, hash2); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java index 21f0636..728dffa 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/BloomFilterReaderFactory.java @@ -32,12 +32,14 @@ public class BloomFilterReaderFactory { private static final int VERSION_OFFSET = 4; private static final int HEADER_SIZE = 8; - public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer) { + public static BloomFilterReader getBloomFilterReader(PinotDataBuffer dataBuffer, boolean onHeap) { int typeValue = dataBuffer.getInt(TYPE_VALUE_OFFSET); int version = dataBuffer.getInt(VERSION_OFFSET); Preconditions.checkState( typeValue == OnHeapGuavaBloomFilterCreator.TYPE_VALUE && version == OnHeapGuavaBloomFilterCreator.VERSION, "Unsupported bloom filter type value: %s and version: %s", typeValue, version); - return new OffHeapGuavaBloomFilterReader(dataBuffer.view(HEADER_SIZE, dataBuffer.size())); + PinotDataBuffer bloomFilterDataBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size()); + return onHeap ? new OnHeapGuavaBloomFilterReader(bloomFilterDataBuffer) + : new OffHeapGuavaBloomFilterReader(bloomFilterDataBuffer); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java index 648fdff..c9feb9e 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/GuavaBloomFilterReaderUtils.java @@ -37,4 +37,29 @@ public class GuavaBloomFilterReaderUtils { public static byte[] hash(String value) { return HASH_FUNCTION.hashBytes(StringUtils.encodeUtf8(value)).asBytes(); } + + /* Cheat sheet: + + m: total bits + n: expected insertions + b: m/n, bits per insertion + p: expected false positive probability + k: number of hash functions + + 1) Optimal k = b * ln2 + 2) p = (1 - e ^ (-kn/m)) ^ k + 3) For optimal k: p = 2 ^ (-k) ~= 0.6185^b + 4) For optimal k: m = -nlnp / ((ln2) ^ 2) + + See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives for the formula. + */ + + /** + * Calculates the fpp (false positive probability) based on the given bloom filter size and number of insertions. + */ + public static double computeFPP(int sizeInBytes, int numInsertions) { + double b = (double) sizeInBytes * Byte.SIZE / numInsertions; + double k = b * Math.log(2); + return Math.pow(2, -k); + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java index 56dac45..a5216e8 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OffHeapGuavaBloomFilterReader.java @@ -18,48 +18,20 @@ */ package org.apache.pinot.core.segment.index.readers.bloom; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Longs; -import org.apache.pinot.core.segment.index.readers.BloomFilterReader; import org.apache.pinot.core.segment.memory.PinotDataBuffer; /** * Off-heap reader for guava bloom filter. - * <p>The behavior should be aligned with {@link com.google.common.hash.BloomFilter}. */ -@SuppressWarnings("UnstableApiUsage") -public class OffHeapGuavaBloomFilterReader implements BloomFilterReader { - // Format of the data buffer header: - // - Strategy ordinal: 1 byte - // - Number of hash functions: 1 byte - // - Number of long values: 4 bytes - private static final int STRATEGY_ORDINAL_OFFSET = 0; - private static final int NUM_HASH_FUNCTIONS_OFFSET = 1; - private static final int NUM_LONGS_OFFSET = 2; - private static final int HEADER_SIZE = 6; - - private final int _numHashFunctions; - private final long _numBits; - private final PinotDataBuffer _valueBuffer; +public class OffHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader { public OffHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) { - byte strategyOrdinal = dataBuffer.getByte(STRATEGY_ORDINAL_OFFSET); - Preconditions.checkState(strategyOrdinal == 1, "Unsupported strategy ordinal: %s", strategyOrdinal); - _numHashFunctions = dataBuffer.getByte(NUM_HASH_FUNCTIONS_OFFSET) & 0xFF; - _numBits = (long) dataBuffer.getInt(NUM_LONGS_OFFSET) * Long.SIZE; - _valueBuffer = dataBuffer.view(HEADER_SIZE, dataBuffer.size()); - } - - @Override - public boolean mightContain(String value) { - return mightContain(GuavaBloomFilterReaderUtils.hash(value)); + super(dataBuffer); } @Override - public boolean mightContain(byte[] hash) { - long hash1 = Longs.fromBytes(hash[7], hash[6], hash[5], hash[4], hash[3], hash[2], hash[1], hash[0]); - long hash2 = Longs.fromBytes(hash[15], hash[14], hash[13], hash[12], hash[11], hash[10], hash[9], hash[8]); + public boolean mightContain(long hash1, long hash2) { long combinedHash = hash1; for (int i = 0; i < _numHashFunctions; i++) { long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits; @@ -75,10 +47,4 @@ public class OffHeapGuavaBloomFilterReader implements BloomFilterReader { } return true; } - - @Override - public void close() { - // NOTE: DO NOT close the PinotDataBuffer here because it is tracked by the caller and might be reused later. The - // caller is responsible of closing the PinotDataBuffer. - } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java new file mode 100644 index 0000000..e990712 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/bloom/OnHeapGuavaBloomFilterReader.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.segment.index.readers.bloom; + +import org.apache.pinot.core.segment.memory.PinotDataBuffer; + + +/** + * On-heap reader for guava bloom filter. + */ +public class OnHeapGuavaBloomFilterReader extends BaseGuavaBloomFilterReader { + private final long[] _data; + + public OnHeapGuavaBloomFilterReader(PinotDataBuffer dataBuffer) { + super(dataBuffer); + + int numLongs = (int) (_numBits / Long.SIZE); + _data = new long[numLongs]; + for (int i = 0; i < numLongs; i++) { + _data[i] = _valueBuffer.getLong(i * Long.BYTES); + } + } + + @Override + public boolean mightContain(long hash1, long hash2) { + long combinedHash = hash1; + for (int i = 0; i < _numHashFunctions; i++) { + long bitIndex = (combinedHash & Long.MAX_VALUE) % _numBits; + // NOTE: Guava bloom filter stores bits in a long array. Inside each long value, the bits are stored in the + // reverse order (the first bit is stored as the right most bit of the long). + if ((_data[(int) (bitIndex >>> 6)] & (1L << bitIndex)) == 0) { + return false; + } + combinedHash += hash2; + } + return true; + } +} diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java index 4ecc586..28e5b7f 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/BloomFilterCreatorTest.java @@ -50,7 +50,7 @@ public class BloomFilterCreatorTest { int cardinality = 10000; String columnName = "testColumn"; try (BloomFilterCreator bloomFilterCreator = new OnHeapGuavaBloomFilterCreator(TEMP_DIR, columnName, cardinality, - new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP))) { + new BloomFilterConfig(BloomFilterConfig.DEFAULT_FPP, 0, false))) { for (int i = 0; i < 5; i++) { bloomFilterCreator.add(Integer.toString(i)); } @@ -60,12 +60,15 @@ public class BloomFilterCreatorTest { // Read the bloom filter File bloomFilterFile = new File(TEMP_DIR, columnName + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(bloomFilterFile); - BloomFilterReader bloomFilterReader = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer)) { + BloomFilterReader onHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, true); + BloomFilterReader offHeapBloomFilter = BloomFilterReaderFactory.getBloomFilterReader(dataBuffer, false);) { for (int i = 0; i < 5; i++) { - Assert.assertTrue(bloomFilterReader.mightContain(Integer.toString(i))); + Assert.assertTrue(onHeapBloomFilter.mightContain(Integer.toString(i))); + Assert.assertTrue(offHeapBloomFilter.mightContain(Integer.toString(i))); } for (int i = 5; i < 10; i++) { - Assert.assertFalse(bloomFilterReader.mightContain(Integer.toString(i))); + Assert.assertFalse(onHeapBloomFilter.mightContain(Integer.toString(i))); + Assert.assertFalse(offHeapBloomFilter.mightContain(Integer.toString(i))); } } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java index d488ece..22af21b 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/BloomFilterConfig.java @@ -28,14 +28,32 @@ public class BloomFilterConfig extends BaseJsonConfig { public static final double DEFAULT_FPP = 0.05; private final double _fpp; + private final int _maxSizeInBytes; + private final boolean _loadOnHeap; @JsonCreator - public BloomFilterConfig(@JsonProperty(value = "fpp", required = true) double fpp) { - Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp); - _fpp = fpp; + public BloomFilterConfig(@JsonProperty(value = "fpp") double fpp, + @JsonProperty(value = "maxSizeInBytes") int maxSizeInBytes, + @JsonProperty(value = "loadOnHeap") boolean loadOnHeap) { + if (fpp != 0.0) { + Preconditions.checkArgument(fpp > 0.0 && fpp < 1.0, "Invalid fpp (false positive probability): %s", fpp); + _fpp = fpp; + } else { + _fpp = DEFAULT_FPP; + } + _maxSizeInBytes = maxSizeInBytes; + _loadOnHeap = loadOnHeap; } public double getFpp() { return _fpp; } + + public int getMaxSizeInBytes() { + return _maxSizeInBytes; + } + + public boolean isLoadOnHeap() { + return _loadOnHeap; + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org