Repository: asterixdb
Updated Branches:
  refs/heads/master 94aa28c37 -> e54115d7f


[ASTERIXDB-2243][STO] Fix BloomFilter size estimation

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Fix the bloom filter size estimation by using the
actual number of elements after bulk loading. This prevents
the bloom filter size grows larger and large under an update
heavy workloads, where most of ingested records are deleted
through merge.

Change-Id: Ib4054797d969efcfceb86f91b5321d34480e25c3
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2285
Sonar-Qube: Jenkins <[email protected]>
Reviewed-by: Michael Blow <[email protected]>
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Contrib: Jenkins <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/e54115d7
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/e54115d7
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/e54115d7

Branch: refs/heads/master
Commit: e54115d7f264ca102fef06578989b6285b35226a
Parents: 94aa28c
Author: luochen01 <[email protected]>
Authored: Tue Jan 16 13:39:28 2018 -0800
Committer: Luo Chen <[email protected]>
Committed: Wed Jan 17 08:57:58 2018 -0800

----------------------------------------------------------------------
 .../am/bloomfilter/impls/BloomFilter.java        | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/e54115d7/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
----------------------------------------------------------------------
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
index 3d8782a..6c16bd1 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -57,7 +57,7 @@ public class BloomFilter {
     private int numHashes;
     private long numElements;
     private long numBits;
-    // keep trace of the version of the bloomfilter to be backward compatible
+    // keep track of the version of the bloomfilter to be backward compatible
     private int version;
     private final int numBitsPerPage;
     private final int numBlocksPerPage;
@@ -277,27 +277,30 @@ public class BloomFilter {
 
     public class BloomFilterBuilder implements IIndexBulkLoader {
         private final long[] hashes = BloomFilter.createHashArray();
-        private final long numElements;
+        private final long estimatedNumElements;
         private final int numHashes;
         private final long numBits;
         private final int numPages;
+        private long actualNumElements;
         private final IFIFOPageQueue queue;
         private final ICachedPage[] pages;
         private ICachedPage metaDataPage = null;
 
-        public BloomFilterBuilder(long numElements, int numHashes, int 
numBitsPerElement) throws HyracksDataException {
+        public BloomFilterBuilder(long estimatedNumElemenets, int numHashes, 
int numBitsPerElement)
+                throws HyracksDataException {
             if (!isActivated) {
                 throw 
HyracksDataException.create(ErrorCode.CANNOT_CREATE_BLOOM_FILTER_BUILDER_FOR_INACTIVE_FILTER);
             }
             queue = bufferCache.createFIFOQueue();
-            this.numElements = numElements;
+            this.estimatedNumElements = estimatedNumElemenets;
             this.numHashes = numHashes;
-            numBits = this.numElements * numBitsPerElement;
+            numBits = this.estimatedNumElements * numBitsPerElement;
             long tmp = (long) Math.ceil(numBits / (double) numBitsPerPage);
             if (tmp > Integer.MAX_VALUE) {
                 throw 
HyracksDataException.create(ErrorCode.CANNOT_CREATE_BLOOM_FILTER_WITH_NUMBER_OF_PAGES,
 tmp);
             }
             numPages = (int) tmp;
+            actualNumElements = 0;
             pages = new ICachedPage[numPages];
             int currentPageId = 1;
             while (currentPageId <= numPages) {
@@ -327,7 +330,7 @@ public class BloomFilter {
             }
             metaDataPage.getBuffer().putInt(NUM_PAGES_OFFSET, numPages);
             metaDataPage.getBuffer().putInt(NUM_HASHES_USED_OFFSET, numHashes);
-            metaDataPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, numElements);
+            metaDataPage.getBuffer().putLong(NUM_ELEMENTS_OFFSET, 
actualNumElements);
             metaDataPage.getBuffer().putLong(NUM_BITS_OFFSET, numBits);
             metaDataPage.getBuffer().putInt(VERSION_OFFSET, 
BLOCKED_BLOOM_FILTER_VERSION);
         }
@@ -337,6 +340,7 @@ public class BloomFilter {
             if (numPages == 0) {
                 throw 
HyracksDataException.create(ErrorCode.CANNOT_ADD_TUPLES_TO_DUMMY_BLOOM_FILTER);
             }
+            actualNumElements++;
             MurmurHash128Bit.hash3_x64_128(tuple, keyFields, SEED, hashes);
 
             long hash = Math.abs(hashes[0] % numBits);
@@ -367,7 +371,7 @@ public class BloomFilter {
             bufferCache.finishQueue();
             BloomFilter.this.numBits = numBits;
             BloomFilter.this.numHashes = numHashes;
-            BloomFilter.this.numElements = numElements;
+            BloomFilter.this.numElements = actualNumElements;
             BloomFilter.this.numPages = numPages;
             BloomFilter.this.version = BLOCKED_BLOOM_FILTER_VERSION;
         }
@@ -383,6 +387,5 @@ public class BloomFilter {
                 bufferCache.returnPage(metaDataPage, false);
             }
         }
-
     }
 }

Reply via email to