This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e9a49ec Fix the race condition for real-time inverted index reader
(#4051)
e9a49ec is described below
commit e9a49ecc260e93ff696b9facf67c14a60b8ecbbd
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Tue Apr 2 12:55:44 2019 -0700
Fix the race condition for real-time inverted index reader (#4051)
When getting document ids from RealtimeInvertedIndexReader, the given
dictionary id might not be added to the inverted index yet. We first add the
value to the dictionary. Before the value is added to the inverted index, the
query might have predicates that match the newly added value. In that case, the
given dictionary id does not exist in the inverted index, and we return an
empty bitmap.
Simplify BitmapBasedFilterOperator because getDocIds() never returns null.
Add a test for the change.
---
.../operator/filter/BitmapBasedFilterOperator.java | 26 +----
.../invertedindex/RealtimeInvertedIndexReader.java | 13 ++-
.../RealtimeInvertedIndexReaderTest.java | 106 +++++++++++++++++++++
3 files changed, 122 insertions(+), 23 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
index 64d7c3e..b2b2883 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
@@ -19,20 +19,15 @@
package org.apache.pinot.core.operator.filter;
import com.google.common.base.Preconditions;
-import java.util.ArrayList;
-import java.util.List;
import org.apache.pinot.core.common.DataSource;
import org.apache.pinot.core.operator.blocks.FilterBlock;
import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
import org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public class BitmapBasedFilterOperator extends BaseFilterOperator {
- private static final Logger LOGGER =
LoggerFactory.getLogger(BitmapBasedFilterOperator.class);
private static final String OPERATOR_NAME = "BitmapBasedFilterOperator";
private final PredicateEvaluator _predicateEvaluator;
@@ -78,27 +73,14 @@ public class BitmapBasedFilterOperator extends
BaseFilterOperator {
int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() :
_predicateEvaluator.getMatchingDictIds();
- // For realtime use case, it is possible that inverted index has not yet
generated for the given dict id, so we
- // filter out null bitmaps
InvertedIndexReader invertedIndex = _dataSource.getInvertedIndex();
int length = dictIds.length;
- List<ImmutableRoaringBitmap> bitmaps = new ArrayList<>(length);
- for (int dictId : dictIds) {
- ImmutableRoaringBitmap bitmap = (ImmutableRoaringBitmap)
invertedIndex.getDocIds(dictId);
- if (bitmap != null) {
- bitmaps.add(bitmap);
- }
+ ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[length];
+ for (int i = 0; i < length; i++) {
+ bitmaps[i] = (ImmutableRoaringBitmap)
invertedIndex.getDocIds(dictIds[i]);
}
- // Log size diff to verify the fix
- int numBitmaps = bitmaps.size();
- if (numBitmaps != length) {
- LOGGER.info("Not all inverted indexes are generated, numDictIds: {},
numBitmaps: {}", length, numBitmaps);
- }
-
- return new FilterBlock(
- new BitmapDocIdSet(bitmaps.toArray(new
ImmutableRoaringBitmap[numBitmaps]), _startDocId, _endDocId,
- _exclusive));
+ return new FilterBlock(new BitmapDocIdSet(bitmaps, _startDocId, _endDocId,
_exclusive));
}
@Override
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
index 363f688..1ff94f6 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
@@ -25,6 +25,10 @@ import
org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
+/**
+ * Real-time bitmap based inverted index reader which allows adding values on
the fly.
+ * <p>This class is thread-safe for single writer multiple readers.
+ */
public class RealtimeInvertedIndexReader implements
InvertedIndexReader<MutableRoaringBitmap> {
private final List<ThreadSafeMutableRoaringBitmap> _bitmaps = new
ArrayList<>();
private final ReentrantReadWriteLock.ReadLock _readLock;
@@ -37,7 +41,7 @@ public class RealtimeInvertedIndexReader implements
InvertedIndexReader<MutableR
}
/**
- * Add the document id to the bitmap for the given dictionary id.
+ * Adds the document id to the bitmap of the given dictionary id.
*/
public void add(int dictId, int docId) {
if (_bitmaps.size() == dictId) {
@@ -60,6 +64,13 @@ public class RealtimeInvertedIndexReader implements
InvertedIndexReader<MutableR
ThreadSafeMutableRoaringBitmap bitmap;
try {
_readLock.lock();
+ // NOTE: the given dictionary id might not be added to the inverted
index yet. We first add the value to the
+ // dictionary. Before the value is added to the inverted index, the
query might have predicates that match the
+ // newly added value. In that case, the given dictionary id does not
exist in the inverted index, and we return an
+ // empty bitmap. For multi-valued column, the dictionary id might be
larger than the bitmap size (not equal).
+ if (_bitmaps.size() <= dictId) {
+ return new MutableRoaringBitmap();
+ }
bitmap = _bitmaps.get(dictId);
} finally {
_readLock.unlock();
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
new file mode 100644
index 0000000..944eb74
--- /dev/null
+++
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.realtime.impl.invertedindex;
+
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
+
+
+public class RealtimeInvertedIndexReaderTest {
+
+ @Test
+ public void testRealtimeInvertedIndexReader() {
+ RealtimeInvertedIndexReader realtimeInvertedIndexReader = new
RealtimeInvertedIndexReader();
+
+ // Add dictionary id 0, document id 0 to the inverted index (single-value
dictionary id not added yet)
+ // Before adding
+ MutableRoaringBitmap docIds = realtimeInvertedIndexReader.getDocIds(0);
+ assertNotNull(docIds);
+ assertTrue(docIds.isEmpty());
+ // After adding
+ realtimeInvertedIndexReader.add(0, 0);
+ docIds = realtimeInvertedIndexReader.getDocIds(0);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertTrue(docIds.contains(0));
+ assertFalse(docIds.contains(1));
+
+ // Add dictionary id 0, document id 1 to the inverted index (single-value
dictionary id already added)
+ // Before adding
+ docIds = realtimeInvertedIndexReader.getDocIds(1);
+ assertNotNull(docIds);
+ assertTrue(docIds.isEmpty());
+ realtimeInvertedIndexReader.add(0, 1);
+ // After adding
+ docIds = realtimeInvertedIndexReader.getDocIds(0);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertTrue(docIds.contains(0));
+ assertTrue(docIds.contains(1));
+
+ // Add dictionary id 1 and 2, document id 2 to the inverted index
(multi-value dictionary ids not added yet)
+ // Before adding dictionary id 1
+ docIds = realtimeInvertedIndexReader.getDocIds(1);
+ assertNotNull(docIds);
+ assertTrue(docIds.isEmpty());
+ docIds = realtimeInvertedIndexReader.getDocIds(2);
+ assertNotNull(docIds);
+ assertTrue(docIds.isEmpty());
+ // After adding dictionary id 1 but before adding dictionary id 2
+ realtimeInvertedIndexReader.add(1, 2);
+ docIds = realtimeInvertedIndexReader.getDocIds(0);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertTrue(docIds.contains(0));
+ assertTrue(docIds.contains(1));
+ assertFalse(docIds.contains(2));
+ docIds = realtimeInvertedIndexReader.getDocIds(1);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertFalse(docIds.contains(0));
+ assertFalse(docIds.contains(1));
+ assertTrue(docIds.contains(2));
+ docIds = realtimeInvertedIndexReader.getDocIds(2);
+ assertNotNull(docIds);
+ assertTrue(docIds.isEmpty());
+ // After adding dictionary id 2
+ realtimeInvertedIndexReader.add(2, 2);
+ docIds = realtimeInvertedIndexReader.getDocIds(0);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertTrue(docIds.contains(0));
+ assertTrue(docIds.contains(1));
+ assertFalse(docIds.contains(2));
+ docIds = realtimeInvertedIndexReader.getDocIds(1);
+ assertNotNull(docIds);
+ assertFalse(docIds.isEmpty());
+ assertFalse(docIds.contains(0));
+ assertFalse(docIds.contains(1));
+ assertTrue(docIds.contains(2));
+ docIds = realtimeInvertedIndexReader.getDocIds(2);
+ assertFalse(docIds.isEmpty());
+ assertFalse(docIds.contains(0));
+ assertFalse(docIds.contains(1));
+ assertTrue(docIds.contains(2));
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]