This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 57d32d7  Fix the range index (#5414)
57d32d7 is described below

commit 57d32d7a3e33db58257ad9fbefcb7726d27e0d73
Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com>
AuthorDate: Thu May 21 11:08:41 2020 -0700

    Fix the range index (#5414)
    
    - Fix the numValuesPerRange calculation logic of range index, which can 
cause IndexOutOfBoundException
    - Fix the rangeId calculation in RangeIndexBasedFilterOperator, where 
OfflineDictionaryBasedRangePredicateEvaluator.getEndDictId() is exclusive
    - Add RangeFilterOperator into the reorder logic
---
 .../ExpressionScanDocIdIterator.java               |  4 +-
 .../dociditerators/MVScanDocIdIterator.java        |  5 +-
 .../dociditerators/SVScanDocIdIterator.java        |  5 +-
 .../dociditerators/ScanBasedDocIdIterator.java     |  3 +-
 .../core/operator/filter/FilterOperatorUtils.java  | 13 ++--
 ...tor.java => RangeIndexBasedFilterOperator.java} | 73 +++++++++-------------
 .../creator/impl/inv/RangeIndexCreator.java        | 26 ++++----
 .../tests/BaseClusterIntegrationTest.java          |  4 +-
 8 files changed, 60 insertions(+), 73 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java
index de49bab..8c5869e 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java
@@ -153,8 +153,8 @@ public class ExpressionScanDocIdIterator implements 
ScanBasedDocIdIterator {
   }
 
   @Override
-  public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) {
-    ProjectionOperator projectionOperator = new 
ProjectionOperator(_dataSourceMap, new BitmapDocIdSetOperator(answer));
+  public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
+    ProjectionOperator projectionOperator = new 
ProjectionOperator(_dataSourceMap, new BitmapDocIdSetOperator(docIds));
     MutableRoaringBitmap matchingDocIds = new MutableRoaringBitmap();
     ProjectionBlock projectionBlock;
     while ((projectionBlock = projectionOperator.nextBlock()) != null) {
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java
index 306b026..6dc06f4 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java
@@ -25,6 +25,7 @@ import org.apache.pinot.core.common.BlockValSet;
 import org.apache.pinot.core.common.Constants;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.roaringbitmap.IntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
@@ -132,13 +133,13 @@ public class MVScanDocIdIterator implements 
ScanBasedDocIdIterator {
   }
 
   @Override
-  public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) {
+  public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
 
     MutableRoaringBitmap result = new MutableRoaringBitmap();
     if (evaluator.isAlwaysFalse()) {
       return result;
     }
-    IntIterator intIterator = answer.getIntIterator();
+    IntIterator intIterator = docIds.getIntIterator();
     int docId = -1, length;
     while (intIterator.hasNext() && docId < endDocId) {
       docId = intIterator.next();
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
index d2e5efd..b9d85dc 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
@@ -25,6 +25,7 @@ import org.apache.pinot.core.common.Constants;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.roaringbitmap.IntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
@@ -138,12 +139,12 @@ public class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
   }
 
   @Override
-  public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) {
+  public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
     MutableRoaringBitmap result = new MutableRoaringBitmap();
     if (_evaluator.isAlwaysFalse()) {
       return result;
     }
-    IntIterator intIterator = answer.getIntIterator();
+    IntIterator intIterator = docIds.getIntIterator();
     int docId = -1;
     while (intIterator.hasNext() && docId < _endDocId) {
       docId = intIterator.next();
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java
index 9255302..0af132d 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.core.operator.dociditerators;
 
 import org.apache.pinot.core.common.BlockDocIdIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
@@ -33,7 +34,7 @@ public interface ScanBasedDocIdIterator extends 
BlockDocIdIterator {
 
   boolean isMatch(int docId);
 
-  MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer);
+  MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds);
 
   /**
    * Get number of entries scanned.
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
index ed134b1..1faa669 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java
@@ -57,7 +57,7 @@ public class FilterOperatorUtils {
     //Only for dictionary encoded columns and offline data sources
     if (predicateType == Predicate.Type.RANGE && dataSource.getDictionary() != 
null
         && dataSource.getRangeIndex() != null) {
-      return new RangeFilterOperator(predicateEvaluator, dataSource, 
startDocId, endDocId);
+      return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, 
startDocId, endDocId);
     }
 
     if (predicateType == Predicate.Type.TEXT_MATCH) {
@@ -150,17 +150,20 @@ public class FilterOperatorUtils {
         if (filterOperator instanceof BitmapBasedFilterOperator) {
           return 1;
         }
-        if (filterOperator instanceof TextMatchFilterOperator) {
+        if (filterOperator instanceof RangeIndexBasedFilterOperator) {
           return 2;
         }
-        if (filterOperator instanceof AndFilterOperator) {
+        if (filterOperator instanceof TextMatchFilterOperator) {
           return 3;
         }
-        if (filterOperator instanceof OrFilterOperator) {
+        if (filterOperator instanceof AndFilterOperator) {
           return 4;
         }
+        if (filterOperator instanceof OrFilterOperator) {
+          return 5;
+        }
         if (filterOperator instanceof ScanBasedFilterOperator) {
-          return getScanBasedFilterPriority((ScanBasedFilterOperator) 
filterOperator, 5, debugOptions);
+          return getScanBasedFilterPriority((ScanBasedFilterOperator) 
filterOperator, 6, debugOptions);
         }
         if (filterOperator instanceof ExpressionFilterOperator) {
           return 10;
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java
similarity index 52%
rename from 
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java
rename to 
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java
index 8398bef..b62045f 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java
@@ -19,33 +19,27 @@
 package org.apache.pinot.core.operator.filter;
 
 import com.google.common.base.Preconditions;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.pinot.core.common.BlockDocIdIterator;
 import org.apache.pinot.core.common.DataSource;
-import org.apache.pinot.core.common.Predicate;
 import org.apache.pinot.core.operator.blocks.FilterBlock;
-import org.apache.pinot.core.operator.dociditerators.MVScanDocIdIterator;
-import org.apache.pinot.core.operator.dociditerators.SVScanDocIdIterator;
 import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
-import org.apache.pinot.core.operator.docidsets.FilterBlockDocIdSet;
+import org.apache.pinot.core.operator.docidsets.ScanBasedDocIdSet;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
-import 
org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory;
+import 
org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator;
 import org.apache.pinot.core.segment.index.readers.RangeIndexReader;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
-public class RangeFilterOperator extends BaseFilterOperator {
+public class RangeIndexBasedFilterOperator extends BaseFilterOperator {
   private static final String OPERATOR_NAME = "RangeFilterOperator";
 
-  private final PredicateEvaluator _predicateEvaluator;
+  private final OfflineDictionaryBasedRangePredicateEvaluator 
_predicateEvaluator;
   private final DataSource _dataSource;
   private final int _startDocId;
   private final int _endDocId;
   private final boolean _exclusive;
 
-  public RangeFilterOperator(PredicateEvaluator predicateEvaluator, DataSource 
dataSource, int startDocId,
+  public RangeIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, 
DataSource dataSource, int startDocId,
       int endDocId) {
     // NOTE:
     // Predicate that is always evaluated as true or false should not be 
passed into the BitmapBasedFilterOperator for
@@ -54,7 +48,8 @@ public class RangeFilterOperator extends BaseFilterOperator {
     // use EmptyFilterOperator.
     Preconditions.checkArgument(!predicateEvaluator.isAlwaysTrue() && 
!predicateEvaluator.isAlwaysFalse());
 
-    _predicateEvaluator = predicateEvaluator;
+    // NOTE: Only dictionary-based evaluator is supported for now
+    _predicateEvaluator = (OfflineDictionaryBasedRangePredicateEvaluator) 
predicateEvaluator;
     _dataSource = dataSource;
     _startDocId = startDocId;
     _endDocId = endDocId;
@@ -63,48 +58,36 @@ public class RangeFilterOperator extends BaseFilterOperator 
{
 
   @Override
   protected FilterBlock getNextBlock() {
-
-    //only dictionary based is supported for now
-    Preconditions.checkState(_predicateEvaluator instanceof 
RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator);
-
-    
RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator 
evaluator =
-        
(RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator) 
_predicateEvaluator;
-
     RangeIndexReader rangeIndexReader = (RangeIndexReader) 
_dataSource.getRangeIndex();
-    int startRangeId = 
rangeIndexReader.findRangeId(evaluator.getStartDictId());
-    int endRangeId = rangeIndexReader.findRangeId(evaluator.getEndDictId());
+    assert rangeIndexReader != null;
+    int firstRangeId = 
rangeIndexReader.findRangeId(_predicateEvaluator.getStartDictId());
+    int lastRangeId = 
rangeIndexReader.findRangeId(_predicateEvaluator.getEndDictId() - 1);
     //Handle Matching Ranges - some ranges match fully but some partially
     //below code assumes first and last range always match partially which may 
not be the case always //todo: optimize it
-    MutableRoaringBitmap mutableRoaringBitmap = new MutableRoaringBitmap();
-    mutableRoaringBitmap.or(rangeIndexReader.getDocIds(startRangeId));
-    if (endRangeId != startRangeId) {
-      mutableRoaringBitmap.or(rangeIndexReader.getDocIds(endRangeId));
-    }
-    final ScanBasedFilterOperator scanBasedFilterOperator =
-        new ScanBasedFilterOperator(_predicateEvaluator, _dataSource, 
_startDocId, _endDocId);
-    FilterBlockDocIdSet scanBlockDocIdSet = 
scanBasedFilterOperator.getNextBlock().getBlockDocIdSet();
-    BlockDocIdIterator iterator = scanBlockDocIdSet.iterator();
-
-    List<ImmutableRoaringBitmap> bitmapList = new ArrayList<>();
-    if (_dataSource.getDataSourceMetadata().isSingleValue()) {
-      bitmapList.add(((SVScanDocIdIterator) 
iterator).applyAnd(mutableRoaringBitmap));
+    ImmutableRoaringBitmap docIdsToScan;
+    if (firstRangeId == lastRangeId) {
+      docIdsToScan = rangeIndexReader.getDocIds(firstRangeId);
     } else {
-      bitmapList.add(((MVScanDocIdIterator) 
iterator).applyAnd(mutableRoaringBitmap));
+      docIdsToScan =
+          ImmutableRoaringBitmap.or(rangeIndexReader.getDocIds(firstRangeId), 
rangeIndexReader.getDocIds(lastRangeId));
     }
+    ScanBasedFilterOperator scanBasedFilterOperator =
+        new ScanBasedFilterOperator(_predicateEvaluator, _dataSource, 
_startDocId, _endDocId);
+    ScanBasedDocIdSet scanBasedDocIdSet = (ScanBasedDocIdSet) 
scanBasedFilterOperator.getNextBlock().getBlockDocIdSet();
+    MutableRoaringBitmap docIds = 
scanBasedDocIdSet.iterator().applyAnd(docIdsToScan);
 
     //All the intermediate ranges will be full match
-    for (int rangeId = startRangeId + 1; rangeId < endRangeId; rangeId++) {
-      bitmapList.add(rangeIndexReader.getDocIds(rangeId));
+    for (int rangeId = firstRangeId + 1; rangeId < lastRangeId; rangeId++) {
+      docIds.or(rangeIndexReader.getDocIds(rangeId));
     }
-    ImmutableRoaringBitmap[] bitmaps = new 
ImmutableRoaringBitmap[bitmapList.size()];
-    bitmapList.toArray(bitmaps);
-    return new FilterBlock(new BitmapDocIdSet(bitmaps, _startDocId, _endDocId, 
_exclusive) {
+    return new FilterBlock(
+        new BitmapDocIdSet(new ImmutableRoaringBitmap[]{docIds}, _startDocId, 
_endDocId, _exclusive) {
 
-      @Override
-      public long getNumEntriesScannedInFilter() {
-        return scanBlockDocIdSet.getNumEntriesScannedInFilter();
-      }
-    });
+          @Override
+          public long getNumEntriesScannedInFilter() {
+            return scanBasedDocIdSet.getNumEntriesScannedInFilter();
+          }
+        });
   }
 
   @Override
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java
index ab33ea2..8e1934d 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java
@@ -34,7 +34,6 @@ import java.util.List;
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.core.query.utils.Pair;
 import org.apache.pinot.core.segment.creator.InvertedIndexCreator;
-import org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -100,7 +99,7 @@ public final class RangeIndexCreator implements 
InvertedIndexCreator {
   private final int _numValues;
   private int _nextDocId;
   private int _nextValueId;
-  private int _numDocsPerRange;
+  private int _numValuesPerRange;
   private FieldSpec.DataType _valueType;
 
   /**
@@ -108,14 +107,14 @@ public final class RangeIndexCreator implements 
InvertedIndexCreator {
    * @param indexDir destination of the range index file
    * @param fieldSpec fieldspec of the column to generate the range index
    * @param valueType DataType of the column, INT if dictionary encoded, or 
INT, FLOAT, LONG, DOUBLE for raw encoded
-   * @param numRanges customize the number of ranges, if -1, we use 
DEFAULT_NUM_RANGES;
-   * @param numDocsPerRange customize the number of Docs Per Range, if -1 
numDocsPerRange = totalValues/numRanges
+   * @param numRanges Number of ranges, use DEFAULT_NUM_RANGES if not 
configured (<= 0)
+   * @param numValuesPerRange Number of values per range, calculate from 
numRanges if not configured (<= 0)
    * @param numDocs total number of documents
    * @param numValues total number of values, used for Multi value columns 
(for single value columns numDocs== numValues)
    * @throws IOException
    */
   public RangeIndexCreator(File indexDir, FieldSpec fieldSpec, 
FieldSpec.DataType valueType, int numRanges,
-      int numDocsPerRange, int numDocs, int numValues)
+      int numValuesPerRange, int numDocs, int numValues)
       throws IOException {
     _valueType = valueType;
     String columnName = fieldSpec.getName();
@@ -125,12 +124,14 @@ public final class RangeIndexCreator implements 
InvertedIndexCreator {
     _numValues = fieldSpec.isSingleValueField() ? numDocs : numValues;
     int valueSize = valueType.size();
     try {
-      //use DEFAULT_NUM_RANGES if numRanges is not set
-      if (numRanges < 0) {
-        numRanges = DEFAULT_NUM_RANGES;
-      }
-      if (numDocsPerRange < 0) {
-        _numDocsPerRange = (int) Math.ceil(_numValues / numRanges);
+      Preconditions.checkArgument(numRanges <= 0 || numValuesPerRange <= 0,
+          "At most one of 'numRanges' and 'numValuesPerRange' should be 
configured");
+      if (numRanges > 0) {
+        _numValuesPerRange = (_numValues + numRanges - 1) / numRanges;
+      } else if (numValuesPerRange > 0) {
+        _numValuesPerRange = numValuesPerRange;
+      } else {
+        _numValuesPerRange = (_numValues + DEFAULT_NUM_RANGES - 1) / 
DEFAULT_NUM_RANGES;
       }
 
       //Value buffer to store the values added via add method
@@ -234,7 +235,7 @@ public final class RangeIndexCreator implements 
InvertedIndexCreator {
     //go over the sorted value to compute ranges
     List<Pair<Integer, Integer>> ranges = new ArrayList<>();
 
-    int boundary = _numDocsPerRange;
+    int boundary = _numValuesPerRange;
     int start = 0;
     for (int i = 0; i < _numValues; i++) {
       if (i > start + boundary) {
@@ -412,7 +413,6 @@ public final class RangeIndexCreator implements 
InvertedIndexCreator {
     }
     valuesAsString.append("] ");
     LOGGER.info(valuesAsString.toString());
-
   }
 
   private PinotDataBuffer createTempBuffer(long size, File mmapFile)
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
index 7dcdeda..3a34df0 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java
@@ -192,9 +192,7 @@ public abstract class BaseClusterIntegrationTest extends 
ClusterTest {
 
   @Nullable
   protected List<String> getRangeIndexColumns() {
-    return null;
-    // TODO: Fix the range index then uncomment this line
-//    return DEFAULT_RANGE_INDEX_COLUMNS;
+    return DEFAULT_RANGE_INDEX_COLUMNS;
   }
 
   @Nullable


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to