This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e9a49ec  Fix the race condition for real-time inverted index reader 
(#4051)
e9a49ec is described below

commit e9a49ecc260e93ff696b9facf67c14a60b8ecbbd
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Tue Apr 2 12:55:44 2019 -0700

    Fix the race condition for real-time inverted index reader (#4051)
    
    When getting document ids from RealtimeInvertedIndexReader, the given 
dictionary id might not be added to the inverted index yet. We first add the 
value to the dictionary. Before the value is added to the inverted index, the 
query might have predicates that match the newly added value. In that case, the 
given dictionary id does not exist in the inverted index, and we return an 
empty bitmap.
    
    Simplify BitmapBasedFilterOperator because getDocIds() never returns null.
    Add a test for the change.
---
 .../operator/filter/BitmapBasedFilterOperator.java |  26 +----
 .../invertedindex/RealtimeInvertedIndexReader.java |  13 ++-
 .../RealtimeInvertedIndexReaderTest.java           | 106 +++++++++++++++++++++
 3 files changed, 122 insertions(+), 23 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
index 64d7c3e..b2b2883 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java
@@ -19,20 +19,15 @@
 package org.apache.pinot.core.operator.filter;
 
 import com.google.common.base.Preconditions;
-import java.util.ArrayList;
-import java.util.List;
 import org.apache.pinot.core.common.DataSource;
 import org.apache.pinot.core.operator.blocks.FilterBlock;
 import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 
 public class BitmapBasedFilterOperator extends BaseFilterOperator {
-  private static final Logger LOGGER = 
LoggerFactory.getLogger(BitmapBasedFilterOperator.class);
   private static final String OPERATOR_NAME = "BitmapBasedFilterOperator";
 
   private final PredicateEvaluator _predicateEvaluator;
@@ -78,27 +73,14 @@ public class BitmapBasedFilterOperator extends 
BaseFilterOperator {
 
     int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() : 
_predicateEvaluator.getMatchingDictIds();
 
-    // For realtime use case, it is possible that inverted index has not yet 
generated for the given dict id, so we
-    // filter out null bitmaps
     InvertedIndexReader invertedIndex = _dataSource.getInvertedIndex();
     int length = dictIds.length;
-    List<ImmutableRoaringBitmap> bitmaps = new ArrayList<>(length);
-    for (int dictId : dictIds) {
-      ImmutableRoaringBitmap bitmap = (ImmutableRoaringBitmap) 
invertedIndex.getDocIds(dictId);
-      if (bitmap != null) {
-        bitmaps.add(bitmap);
-      }
+    ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[length];
+    for (int i = 0; i < length; i++) {
+      bitmaps[i] = (ImmutableRoaringBitmap) 
invertedIndex.getDocIds(dictIds[i]);
     }
 
-    // Log size diff to verify the fix
-    int numBitmaps = bitmaps.size();
-    if (numBitmaps != length) {
-      LOGGER.info("Not all inverted indexes are generated, numDictIds: {}, 
numBitmaps: {}", length, numBitmaps);
-    }
-
-    return new FilterBlock(
-        new BitmapDocIdSet(bitmaps.toArray(new 
ImmutableRoaringBitmap[numBitmaps]), _startDocId, _endDocId,
-            _exclusive));
+    return new FilterBlock(new BitmapDocIdSet(bitmaps, _startDocId, _endDocId, 
_exclusive));
   }
 
   @Override
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
index 363f688..1ff94f6 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReader.java
@@ -25,6 +25,10 @@ import 
org.apache.pinot.core.segment.index.readers.InvertedIndexReader;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
+/**
+ * Real-time bitmap based inverted index reader which allows adding values on 
the fly.
+ * <p>This class is thread-safe for single writer multiple readers.
+ */
 public class RealtimeInvertedIndexReader implements 
InvertedIndexReader<MutableRoaringBitmap> {
   private final List<ThreadSafeMutableRoaringBitmap> _bitmaps = new 
ArrayList<>();
   private final ReentrantReadWriteLock.ReadLock _readLock;
@@ -37,7 +41,7 @@ public class RealtimeInvertedIndexReader implements 
InvertedIndexReader<MutableR
   }
 
   /**
-   * Add the document id to the bitmap for the given dictionary id.
+   * Adds the document id to the bitmap of the given dictionary id.
    */
   public void add(int dictId, int docId) {
     if (_bitmaps.size() == dictId) {
@@ -60,6 +64,13 @@ public class RealtimeInvertedIndexReader implements 
InvertedIndexReader<MutableR
     ThreadSafeMutableRoaringBitmap bitmap;
     try {
       _readLock.lock();
+      // NOTE: the given dictionary id might not be added to the inverted 
index yet. We first add the value to the
+      // dictionary. Before the value is added to the inverted index, the 
query might have predicates that match the
+      // newly added value. In that case, the given dictionary id does not 
exist in the inverted index, and we return an
+      // empty bitmap. For multi-valued column, the dictionary id might be 
larger than the bitmap size (not equal).
+      if (_bitmaps.size() <= dictId) {
+        return new MutableRoaringBitmap();
+      }
       bitmap = _bitmaps.get(dictId);
     } finally {
       _readLock.unlock();
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
new file mode 100644
index 0000000..944eb74
--- /dev/null
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/invertedindex/RealtimeInvertedIndexReaderTest.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.realtime.impl.invertedindex;
+
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
+
+
+public class RealtimeInvertedIndexReaderTest {
+
+  @Test
+  public void testRealtimeInvertedIndexReader() {
+    RealtimeInvertedIndexReader realtimeInvertedIndexReader = new 
RealtimeInvertedIndexReader();
+
+    // Add dictionary id 0, document id 0 to the inverted index (single-value 
dictionary id not added yet)
+    // Before adding
+    MutableRoaringBitmap docIds = realtimeInvertedIndexReader.getDocIds(0);
+    assertNotNull(docIds);
+    assertTrue(docIds.isEmpty());
+    // After adding
+    realtimeInvertedIndexReader.add(0, 0);
+    docIds = realtimeInvertedIndexReader.getDocIds(0);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertTrue(docIds.contains(0));
+    assertFalse(docIds.contains(1));
+
+    // Add dictionary id 0, document id 1 to the inverted index (single-value 
dictionary id already added)
+    // Before adding
+    docIds = realtimeInvertedIndexReader.getDocIds(1);
+    assertNotNull(docIds);
+    assertTrue(docIds.isEmpty());
+    realtimeInvertedIndexReader.add(0, 1);
+    // After adding
+    docIds = realtimeInvertedIndexReader.getDocIds(0);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertTrue(docIds.contains(0));
+    assertTrue(docIds.contains(1));
+
+    // Add dictionary id 1 and 2, document id 2 to the inverted index 
(multi-value dictionary ids not added yet)
+    // Before adding dictionary id 1
+    docIds = realtimeInvertedIndexReader.getDocIds(1);
+    assertNotNull(docIds);
+    assertTrue(docIds.isEmpty());
+    docIds = realtimeInvertedIndexReader.getDocIds(2);
+    assertNotNull(docIds);
+    assertTrue(docIds.isEmpty());
+    // After adding dictionary id 1 but before adding dictionary id 2
+    realtimeInvertedIndexReader.add(1, 2);
+    docIds = realtimeInvertedIndexReader.getDocIds(0);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertTrue(docIds.contains(0));
+    assertTrue(docIds.contains(1));
+    assertFalse(docIds.contains(2));
+    docIds = realtimeInvertedIndexReader.getDocIds(1);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertFalse(docIds.contains(0));
+    assertFalse(docIds.contains(1));
+    assertTrue(docIds.contains(2));
+    docIds = realtimeInvertedIndexReader.getDocIds(2);
+    assertNotNull(docIds);
+    assertTrue(docIds.isEmpty());
+    // After adding dictionary id 2
+    realtimeInvertedIndexReader.add(2, 2);
+    docIds = realtimeInvertedIndexReader.getDocIds(0);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertTrue(docIds.contains(0));
+    assertTrue(docIds.contains(1));
+    assertFalse(docIds.contains(2));
+    docIds = realtimeInvertedIndexReader.getDocIds(1);
+    assertNotNull(docIds);
+    assertFalse(docIds.isEmpty());
+    assertFalse(docIds.contains(0));
+    assertFalse(docIds.contains(1));
+    assertTrue(docIds.contains(2));
+    docIds = realtimeInvertedIndexReader.getDocIds(2);
+    assertFalse(docIds.isEmpty());
+    assertFalse(docIds.contains(0));
+    assertFalse(docIds.contains(1));
+    assertTrue(docIds.contains(2));
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to