Luo Chen has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/2069
Change subject: [ASTERIXDB-2128] Fix bloomfilter during index search
......................................................................
[ASTERIXDB-2128] Fix bloomfilter during index search
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- The current use of bloomfilter during search is completely wrong
(including primary index point lookups, secondary index search with
deleted btrees). We call BTree.search before bloomfilter check,
which renders bloomfilter completely useless. This patch fixes this
serious problem.
- Compute murmur hashing during bloom filter check is expensive.
Since bloom filters of disk components of an index share the same hash
values for a tuple, this patch also separate compute hash from bloom
filter check to speed up performance
Change-Id: I90ab0d0b2da0028e0ec3cfa94d21881318293ff7
---
M
hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddyAbstractCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddySearchCursor.java
A
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/AbstractBloomFilterAwareSearchCursor.java
D
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
M
hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
12 files changed, 126 insertions(+), 109 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/69/2069/1
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
index 47a9734..cd84892 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-bloomfilter/src/main/java/org/apache/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -82,11 +82,14 @@
return numElements;
}
- public boolean contains(ITupleReference tuple, long[] hashes) throws
HyracksDataException {
+ public void computeHashes(ITupleReference tuple, long[] hashes) {
+ MurmurHash128Bit.hash3_x64_128(tuple, keyFields, SEED, hashes);
+ }
+
+ public boolean contains(long[] hashes) throws HyracksDataException {
if (numPages == 0) {
return false;
}
- MurmurHash128Bit.hash3_x64_128(tuple, keyFields, SEED, hashes);
for (int i = 0; i < numHashes; ++i) {
long hash = Math.abs((hashes[0] + i * hashes[1]) % numBits);
@@ -182,14 +185,18 @@
return new BloomFilterBuilder(numElements, numHashes,
numBitsPerElement);
}
+ public static long[] newHashArray() {
+ return new long[2];
+ }
+
public class BloomFilterBuilder implements IIndexBulkLoader {
- private final long[] hashes = new long[2];
+ private final long[] hashes = BloomFilter.newHashArray();
private final long numElements;
private final int numHashes;
private final long numBits;
private final int numPages;
- private IFIFOPageQueue queue;
- private ICachedPage[] pages;
+ private final IFIFOPageQueue queue;
+ private final ICachedPage[] pages;
private ICachedPage metaDataPage = null;
public BloomFilterBuilder(long numElements, int numHashes, int
numBitsPerElement) throws HyracksDataException {
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
index 32cc7ee..13cb57a 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
@@ -310,8 +310,4 @@
public boolean isExclusiveLatchNodes() {
return exclusiveLatchNodes;
}
-
- public boolean isBloomFilterAware() {
- return false;
- }
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
index 6126c53..231f92c 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
@@ -23,6 +23,7 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import org.apache.hyracks.storage.am.btree.impls.BTree;
import org.apache.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -36,13 +37,13 @@
import org.apache.hyracks.storage.am.lsm.common.api.ILSMHarness;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMTreeTupleReference;
-import
org.apache.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
+import
org.apache.hyracks.storage.am.lsm.common.impls.AbstractBloomFilterAwareSearchCursor;
import org.apache.hyracks.storage.common.ICursorInitialState;
import org.apache.hyracks.storage.common.ISearchOperationCallback;
import org.apache.hyracks.storage.common.ISearchPredicate;
import org.apache.hyracks.storage.common.buffercache.IBufferCache;
-public class LSMBTreePointSearchCursor implements ITreeIndexCursor {
+public class LSMBTreePointSearchCursor extends
AbstractBloomFilterAwareSearchCursor implements ITreeIndexCursor {
private BTreeRangeSearchCursor[] rangeCursors;
private final ILSMIndexOperationContext opCtx;
@@ -71,6 +72,9 @@
}
boolean reconciled = false;
for (int i = 0; i < numBTrees; ++i) {
+ if (!checkBloomFilter(bloomFilters[i], predicate.getLowKey())) {
+ continue;
+ }
btreeAccessors[i].search(rangeCursors[i], predicate);
if (rangeCursors[i].hasNext()) {
rangeCursors[i].next();
@@ -139,9 +143,9 @@
rangeCursors[i].reset();
}
}
- rangeCursors = null;
nextHasBeenCalled = false;
foundTuple = false;
+ bloomFilterHashComputed = false;
} finally {
if (lsmHarness != null) {
lsmHarness.endSearch(opCtx);
@@ -161,6 +165,7 @@
// object creation: should be relatively low
rangeCursors = new BTreeRangeSearchCursor[numBTrees];
btreeAccessors = new BTreeAccessor[numBTrees];
+ bloomFilters = new BloomFilter[numBTrees];
}
includeMutableComponent = false;
@@ -169,8 +174,7 @@
BTree btree;
if (component.getType() == LSMComponentType.MEMORY) {
includeMutableComponent = true;
- // No need for a bloom filter for the in-memory BTree.
- if (rangeCursors[i] == null ||
rangeCursors[i].isBloomFilterAware()) {
+ if (rangeCursors[i] == null) {
// create a new one
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame)
lsmInitialState.getLeafFrameFactory().createFrame();
rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame,
false);
@@ -179,19 +183,19 @@
rangeCursors[i].reset();
}
btree = ((LSMBTreeMemoryComponent) component).getBTree();
+ // no bloom filter for in-memory BTree
+ bloomFilters[i] = null;
} else {
- if (rangeCursors[i] != null &&
rangeCursors[i].isBloomFilterAware()) {
+ if (rangeCursors[i] != null) {
// can re-use cursor
- ((BloomFilterAwareBTreePointSearchCursor) rangeCursors[i])
- .resetBloomFilter(((LSMBTreeDiskComponent)
component).getBloomFilter());
rangeCursors[i].reset();
} else {
// create new cursor <should be relatively rare>
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame)
lsmInitialState.getLeafFrameFactory().createFrame();
- rangeCursors[i] = new
BloomFilterAwareBTreePointSearchCursor(leafFrame, false,
- ((LSMBTreeDiskComponent)
component).getBloomFilter());
+ rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame,
false);
}
btree = ((LSMBTreeDiskComponent) component).getBTree();
+ bloomFilters[i] = ((LSMBTreeDiskComponent)
component).getBloomFilter();
}
if (btreeAccessors[i] == null) {
btreeAccessors[i] = (BTreeAccessor)
btree.createAccessor(NoOpOperationCallback.INSTANCE,
@@ -201,6 +205,7 @@
btreeAccessors[i].reset(btree, NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
}
}
+ bloomFilterHashComputed = false;
nextHasBeenCalled = false;
foundTuple = false;
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddyAbstractCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddyAbstractCursor.java
index 03d28d0..6394c3f 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddyAbstractCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddyAbstractCursor.java
@@ -22,6 +22,7 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import org.apache.hyracks.storage.am.btree.impls.BTree;
import org.apache.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -33,13 +34,14 @@
import
org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMHarness;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import
org.apache.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
+import
org.apache.hyracks.storage.am.lsm.common.impls.AbstractBloomFilterAwareSearchCursor;
import org.apache.hyracks.storage.common.ICursorInitialState;
import org.apache.hyracks.storage.common.ISearchPredicate;
import org.apache.hyracks.storage.common.MultiComparator;
import org.apache.hyracks.storage.common.buffercache.IBufferCache;
-public abstract class LSMBTreeWithBuddyAbstractCursor implements
ITreeIndexCursor {
+public abstract class LSMBTreeWithBuddyAbstractCursor extends
AbstractBloomFilterAwareSearchCursor
+ implements ITreeIndexCursor {
protected boolean open;
protected BTreeRangeSearchCursor[] btreeCursors;
@@ -86,6 +88,7 @@
buddyBtreeCursors = new BTreeRangeSearchCursor[numberOfTrees];
btreeAccessors = new BTreeAccessor[numberOfTrees];
buddyBtreeAccessors = new BTreeAccessor[numberOfTrees];
+ bloomFilters = new BloomFilter[numberOfTrees];
}
includeMutableComponent = false;
@@ -98,7 +101,7 @@
// This is not needed at the moment but is implemented anyway
includeMutableComponent = true;
// No need for a bloom filter for the in-memory BTree.
- if (buddyBtreeCursors[i] == null ||
buddyBtreeCursors[i].isBloomFilterAware()) {
+ if (buddyBtreeCursors[i] == null) {
buddyBtreeCursors[i] = new BTreeRangeSearchCursor(
(IBTreeLeafFrame)
lsmInitialState.getBuddyBTreeLeafFrameFactory().createFrame(), false);
} else {
@@ -106,16 +109,17 @@
}
btree = ((LSMBTreeWithBuddyMemoryComponent)
component).getBTree();
buddyBtree = ((LSMBTreeWithBuddyMemoryComponent)
component).getBuddyBTree();
+ bloomFilters[i] = null;
} else {
- if (buddyBtreeCursors[i] == null ||
!buddyBtreeCursors[i].isBloomFilterAware()) {
- buddyBtreeCursors[i] = new
BloomFilterAwareBTreePointSearchCursor(
- (IBTreeLeafFrame)
lsmInitialState.getBuddyBTreeLeafFrameFactory().createFrame(), false,
- ((LSMBTreeWithBuddyDiskComponent)
operationalComponents.get(i)).getBloomFilter());
+ if (buddyBtreeCursors[i] == null) {
+ buddyBtreeCursors[i] = new BTreeRangeSearchCursor(
+ (IBTreeLeafFrame)
lsmInitialState.getBuddyBTreeLeafFrameFactory().createFrame(), false);
} else {
buddyBtreeCursors[i].reset();
}
btree = ((LSMBTreeWithBuddyDiskComponent)
component).getBTree();
buddyBtree = ((LSMBTreeWithBuddyDiskComponent)
component).getBuddyBTree();
+ bloomFilters[i] = ((LSMBTreeWithBuddyDiskComponent)
component).getBloomFilter();
}
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame)
lsmInitialState.getBTreeLeafFrameFactory().createFrame();
if (btreeAccessors[i] == null) {
@@ -134,6 +138,7 @@
btreeRangePredicate = (RangePredicate) searchPred;
buddyBtreeRangePredicate.reset(null, null, true, true, buddyBtreeCmp,
buddyBtreeCmp);
open = true;
+ bloomFilterHashComputed = false;
}
@Override
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddySearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddySearchCursor.java
index d6aa0c6..8494525 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddySearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree/src/main/java/org/apache/hyracks/storage/am/lsm/btree/impls/LSMBTreeWithBuddySearchCursor.java
@@ -28,7 +28,7 @@
public class LSMBTreeWithBuddySearchCursor extends
LSMBTreeWithBuddyAbstractCursor {
private int currentCursor;
- private PermutingTupleReference buddyBTreeTuple;
+ private final PermutingTupleReference buddyBTreeTuple;
public LSMBTreeWithBuddySearchCursor(ILSMIndexOperationContext opCtx,
int[] buddyBTreeFields) {
super(opCtx);
@@ -80,7 +80,11 @@
ITupleReference currentTuple =
btreeCursors[currentCursor].getTuple();
buddyBTreeTuple.reset(btreeCursors[currentCursor].getTuple());
boolean killerTupleFound = false;
+ bloomFilterHashComputed = false;
for (int i = 0; i < currentCursor; i++) {
+ if (!checkBloomFilter(bloomFilters[i], buddyBTreeTuple)) {
+ continue;
+ }
buddyBtreeCursors[i].reset();
buddyBtreeRangePredicate.setHighKey(buddyBTreeTuple, true);
buddyBtreeRangePredicate.setLowKey(buddyBTreeTuple, true);
@@ -115,13 +119,13 @@
@Override
public ITupleReference getFilterMinTuple() {
ILSMComponentFilter filter = getFilter();
- return filter == null ? null : filter.getMinTuple();
+ return filter == null ? null : filter.getMinTuple();
}
@Override
public ITupleReference getFilterMaxTuple() {
ILSMComponentFilter filter = getFilter();
- return filter == null ? null : filter.getMaxTuple();
+ return filter == null ? null : filter.getMaxTuple();
}
private ILSMComponentFilter getFilter() {
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/AbstractBloomFilterAwareSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/AbstractBloomFilterAwareSearchCursor.java
new file mode 100644
index 0000000..b815c47
--- /dev/null
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/AbstractBloomFilterAwareSearchCursor.java
@@ -0,0 +1,27 @@
+package org.apache.hyracks.storage.am.lsm.common.impls;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
+
+public abstract class AbstractBloomFilterAwareSearchCursor {
+ protected final long[] hashes = BloomFilter.newHashArray();
+ protected boolean bloomFilterHashComputed = false;
+ protected BloomFilter[] bloomFilters;
+
+ protected boolean checkBloomFilter(BloomFilter bloomFilter,
ITupleReference tuple) throws HyracksDataException {
+ if (bloomFilter == null) {
+ return true;
+ } else {
+ if (!bloomFilterHashComputed) {
+ // computing hashes for bloom filter is expensive
+ // Since bloom filters of all indexes share the same hash
values for an input tuple,
+ // we only compute the hash value once here
+ bloomFilter.computeHashes(tuple, hashes);
+ bloomFilterHashComputed = true;
+ }
+ return bloomFilter.contains(hashes);
+ }
+
+ }
+}
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
deleted file mode 100644
index f84aeb5..0000000
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-common/src/main/java/org/apache/hyracks/storage/am/lsm/common/impls/BloomFilterAwareBTreePointSearchCursor.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.lsm.common.impls;
-
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
-import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
-import org.apache.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
-
-public class BloomFilterAwareBTreePointSearchCursor extends
BTreeRangeSearchCursor {
- private BloomFilter bloomFilter;
- private long[] hashes = new long[2];
-
- public BloomFilterAwareBTreePointSearchCursor(IBTreeLeafFrame frame,
boolean exclusiveLatchNodes,
- BloomFilter bloomFilter) {
- super(frame, exclusiveLatchNodes);
- this.bloomFilter = bloomFilter;
- }
-
- @Override
- public boolean hasNext() throws HyracksDataException {
- if (bloomFilter.contains(lowKey, hashes)) {
- return super.hasNext();
- }
- return false;
- }
-
- @Override
- public boolean isBloomFilterAware() {
- return true;
- }
-
- public void resetBloomFilter(BloomFilter bloomFilter) {
- this.bloomFilter = bloomFilter;
- }
-}
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index ec1fe68..96218ad 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -22,13 +22,13 @@
import java.util.ArrayList;
import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import org.apache.hyracks.storage.am.btree.impls.RangePredicate;
import org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent;
import
org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import
org.apache.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import org.apache.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import
org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import org.apache.hyracks.storage.common.ICursorInitialState;
@@ -41,6 +41,9 @@
// Assuming the cursor for all deleted-keys indexes are of the same type.
private IIndexCursor[] deletedKeysBTreeCursors;
+ protected BloomFilter[] bloomFilters;
+ protected long[] bloomFilterHashes = BloomFilter.newHashArray();
+ protected boolean hashComputed = false;
protected ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
protected PermutingTupleReference keysOnlyTuple;
protected RangePredicate keySearchPred;
@@ -78,13 +81,12 @@
deletedKeysBTreeCursors = new
IIndexCursor[deletedKeysBTreeAccessors.size()];
for (int i = 0; i < operationalComponents.size(); i++) {
ILSMComponent component = operationalComponents.get(i);
+ deletedKeysBTreeCursors[i] =
deletedKeysBTreeAccessors.get(i).createSearchCursor(false);
if (component.getType() == LSMComponentType.MEMORY) {
// No need for a bloom filter for the in-memory BTree.
- deletedKeysBTreeCursors[i] =
deletedKeysBTreeAccessors.get(i).createSearchCursor(false);
+ bloomFilters[i] = null;
} else {
- deletedKeysBTreeCursors[i] = new
BloomFilterAwareBTreePointSearchCursor(
- (IBTreeLeafFrame)
lsmInitState.getgetDeletedKeysBTreeLeafFrameFactory().createFrame(),
- false, ((LSMInvertedIndexDiskComponent)
operationalComponents.get(i)).getBloomFilter());
+ bloomFilters[i] = ((LSMInvertedIndexDiskComponent)
component).getBloomFilter();
}
}
}
@@ -102,7 +104,11 @@
protected boolean isDeleted(PriorityQueueElement checkElement) throws
HyracksDataException {
keysOnlyTuple.reset(checkElement.getTuple());
int end = checkElement.getCursorIndex();
+ hashComputed = false;
for (int i = 0; i < end; i++) {
+ if (!checkBloomFilter(bloomFilters[i], keysOnlyTuple)) {
+ continue;
+ }
deletedKeysBTreeCursors[i].reset();
try {
deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i],
keySearchPred);
@@ -115,4 +121,17 @@
}
return false;
}
+
+ private boolean checkBloomFilter(BloomFilter bloomFilter, ITupleReference
tuple) throws HyracksDataException {
+ if (bloomFilter == null) {
+ return true;
+ } else {
+ if (!hashComputed) {
+ bloomFilter.computeHashes(tuple, bloomFilterHashes);
+ hashComputed = true;
+ }
+ return bloomFilter.contains(bloomFilterHashes);
+ }
+
+ }
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index b07b4b0..b6c8d0e 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -22,14 +22,14 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import org.apache.hyracks.storage.am.btree.impls.RangePredicate;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent;
import
org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponentFilter;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMHarness;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import
org.apache.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
+import
org.apache.hyracks.storage.am.lsm.common.impls.AbstractBloomFilterAwareSearchCursor;
import org.apache.hyracks.storage.common.ICursorInitialState;
import org.apache.hyracks.storage.common.IIndexAccessor;
import org.apache.hyracks.storage.common.IIndexCursor;
@@ -41,7 +41,7 @@
* Searches the components one-by-one, completely consuming a cursor before
moving on to the next one.
* Therefore, the are no guarantees about sort order of the results.
*/
-public class LSMInvertedIndexSearchCursor implements IIndexCursor {
+public class LSMInvertedIndexSearchCursor extends
AbstractBloomFilterAwareSearchCursor implements IIndexCursor {
private IIndexAccessor currentAccessor;
private IIndexCursor currentCursor;
@@ -76,16 +76,15 @@
// For searching the deleted-keys BTrees.
deletedKeysBTreeAccessors =
lsmInitState.getDeletedKeysBTreeAccessors();
deletedKeysBTreeCursors = new
IIndexCursor[deletedKeysBTreeAccessors.size()];
-
+ bloomFilters = new BloomFilter[deletedKeysBTreeAccessors.size()];
for (int i = 0; i < operationalComponents.size(); i++) {
ILSMComponent component = operationalComponents.get(i);
+ deletedKeysBTreeCursors[i] =
deletedKeysBTreeAccessors.get(i).createSearchCursor(false);
if (component.getType() == LSMComponentType.MEMORY) {
// No need for a bloom filter for the in-memory BTree.
- deletedKeysBTreeCursors[i] =
deletedKeysBTreeAccessors.get(i).createSearchCursor(false);
+ bloomFilters[i] = null;
} else {
- deletedKeysBTreeCursors[i] = new
BloomFilterAwareBTreePointSearchCursor(
- (IBTreeLeafFrame)
lsmInitState.getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
- ((LSMInvertedIndexDiskComponent)
operationalComponents.get(i)).getBloomFilter());
+ bloomFilters[i] = ((LSMInvertedIndexDiskComponent)
component).getBloomFilter();
}
}
@@ -96,8 +95,12 @@
protected boolean isDeleted(ITupleReference key) throws
HyracksDataException {
keySearchPred.setLowKey(key, true);
keySearchPred.setHighKey(key, true);
+ bloomFilterHashComputed = false;
for (int i = 0; i < accessorIndex; i++) {
deletedKeysBTreeCursors[i].reset();
+ if (!checkBloomFilter(bloomFilters[i], key)) {
+ continue;
+ }
try {
deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursors[i],
keySearchPred);
if (deletedKeysBTreeCursors[i].hasNext()) {
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
index edb7481..c31872e 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
@@ -33,7 +33,7 @@
import
org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMHarness;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import
org.apache.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
+import
org.apache.hyracks.storage.am.lsm.common.impls.AbstractBloomFilterAwareSearchCursor;
import org.apache.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
import org.apache.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
import org.apache.hyracks.storage.am.rtree.impls.RTree;
@@ -45,7 +45,7 @@
import org.apache.hyracks.storage.common.MultiComparator;
import org.apache.hyracks.storage.common.buffercache.IBufferCache;
-public abstract class LSMRTreeAbstractCursor implements ITreeIndexCursor {
+public abstract class LSMRTreeAbstractCursor extends
AbstractBloomFilterAwareSearchCursor implements ITreeIndexCursor {
protected boolean open;
protected RTreeSearchCursor[] rtreeCursors;
@@ -102,7 +102,7 @@
if (component.getType() == LSMComponentType.MEMORY) {
includeMutableComponent = true;
// No need for a bloom filter for the in-memory BTree.
- if (btreeCursors[i] == null ||
btreeCursors[i].isBloomFilterAware()) {
+ if (btreeCursors[i] == null) {
//create
btreeCursors[i] = new BTreeRangeSearchCursor(
(IBTreeLeafFrame)
lsmInitialState.getBTreeLeafFrameFactory().createFrame(), false);
@@ -112,20 +112,19 @@
}
rtree = ((LSMRTreeMemoryComponent) component).getRTree();
btree = ((LSMRTreeMemoryComponent) component).getBTree();
+ bloomFilters[i] = null;
} else {
- if (btreeCursors[i] == null ||
!btreeCursors[i].isBloomFilterAware()) {
+ if (btreeCursors[i] == null) {
// need to create a new one
- btreeCursors[i] = new
BloomFilterAwareBTreePointSearchCursor(
- (IBTreeLeafFrame)
lsmInitialState.getBTreeLeafFrameFactory().createFrame(), false,
- ((LSMRTreeDiskComponent)
operationalComponents.get(i)).getBloomFilter());
+ btreeCursors[i] = new BTreeRangeSearchCursor(
+ (IBTreeLeafFrame)
lsmInitialState.getBTreeLeafFrameFactory().createFrame(), false);
} else {
// reset
- ((BloomFilterAwareBTreePointSearchCursor) btreeCursors[i])
- .resetBloomFilter(((LSMRTreeDiskComponent)
operationalComponents.get(i)).getBloomFilter());
btreeCursors[i].reset();
}
rtree = ((LSMRTreeDiskComponent) component).getRTree();
btree = ((LSMRTreeDiskComponent) component).getBTree();
+ bloomFilters[i] = ((LSMRTreeDiskComponent)
component).getBloomFilter();
}
if (rtreeCursors[i] == null) {
rtreeCursors[i] = new RTreeSearchCursor(
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
index ec85127..e8f7819 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
@@ -22,7 +22,6 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
import org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference;
-import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponentFilter;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import org.apache.hyracks.storage.common.ICursorInitialState;
@@ -31,7 +30,7 @@
public class LSMRTreeSearchCursor extends LSMRTreeAbstractCursor {
private int currentCursor;
- private PermutingTupleReference btreeTuple;
+ private final PermutingTupleReference btreeTuple;
public LSMRTreeSearchCursor(ILSMIndexOperationContext opCtx, int[]
buddyBTreeFields) {
super(opCtx);
@@ -99,7 +98,11 @@
ITupleReference currentTuple =
rtreeCursors[currentCursor].getTuple();
btreeTuple.reset(rtreeCursors[currentCursor].getTuple());
boolean killerTupleFound = false;
+ bloomFilterHashComputed = false;
for (int i = 0; i < currentCursor; i++) {
+ if (!checkBloomFilter(bloomFilters[i], btreeTuple)) {
+ continue;
+ }
btreeCursors[i].reset();
btreeRangePredicate.setHighKey(btreeTuple, true);
btreeRangePredicate.setLowKey(btreeTuple, true);
diff --git
a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
index 35779ac..3ccc523 100644
---
a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
+++
b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -102,7 +102,8 @@
long[] hashes = new long[2];
for (int i = 0; i < keys.size(); ++i) {
TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
- Assert.assertTrue(bf.contains(tuple, hashes));
+ bf.computeHashes(tuple, hashes);
+ Assert.assertTrue(bf.contains(hashes));
}
bf.deactivate();
@@ -160,7 +161,8 @@
long[] hashes = new long[2];
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes,
s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
- Assert.assertTrue(bf.contains(tuple, hashes));
+ bf.computeHashes(tuple, hashes);
+ Assert.assertTrue(bf.contains(hashes));
}
bf.deactivate();
--
To view, visit https://asterix-gerrit.ics.uci.edu/2069
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I90ab0d0b2da0028e0ec3cfa94d21881318293ff7
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Luo Chen <[email protected]>