This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 57d32d7 Fix the range index (#5414) 57d32d7 is described below commit 57d32d7a3e33db58257ad9fbefcb7726d27e0d73 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Thu May 21 11:08:41 2020 -0700 Fix the range index (#5414) - Fix the numValuesPerRange calculation logic of range index, which can cause IndexOutOfBoundException - Fix the rangeId calculation in RangeIndexBasedFilterOperator, where OfflineDictionaryBasedRangePredicateEvaluator.getEndDictId() is exclusive - Add RangeFilterOperator into the reorder logic --- .../ExpressionScanDocIdIterator.java | 4 +- .../dociditerators/MVScanDocIdIterator.java | 5 +- .../dociditerators/SVScanDocIdIterator.java | 5 +- .../dociditerators/ScanBasedDocIdIterator.java | 3 +- .../core/operator/filter/FilterOperatorUtils.java | 13 ++-- ...tor.java => RangeIndexBasedFilterOperator.java} | 73 +++++++++------------- .../creator/impl/inv/RangeIndexCreator.java | 26 ++++---- .../tests/BaseClusterIntegrationTest.java | 4 +- 8 files changed, 60 insertions(+), 73 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java index de49bab..8c5869e 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ExpressionScanDocIdIterator.java @@ -153,8 +153,8 @@ public class ExpressionScanDocIdIterator implements ScanBasedDocIdIterator { } @Override - public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) { - ProjectionOperator projectionOperator = new ProjectionOperator(_dataSourceMap, new BitmapDocIdSetOperator(answer)); + public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) { + ProjectionOperator projectionOperator = new ProjectionOperator(_dataSourceMap, new BitmapDocIdSetOperator(docIds)); MutableRoaringBitmap matchingDocIds = new MutableRoaringBitmap(); ProjectionBlock projectionBlock; while ((projectionBlock = projectionOperator.nextBlock()) != null) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java index 306b026..6dc06f4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/MVScanDocIdIterator.java @@ -25,6 +25,7 @@ import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.common.Constants; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; import org.roaringbitmap.IntIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -132,13 +133,13 @@ public class MVScanDocIdIterator implements ScanBasedDocIdIterator { } @Override - public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) { + public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) { MutableRoaringBitmap result = new MutableRoaringBitmap(); if (evaluator.isAlwaysFalse()) { return result; } - IntIterator intIterator = answer.getIntIterator(); + IntIterator intIterator = docIds.getIntIterator(); int docId = -1, length; while (intIterator.hasNext() && docId < endDocId) { docId = intIterator.next(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java index d2e5efd..b9d85dc 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java @@ -25,6 +25,7 @@ import org.apache.pinot.core.common.Constants; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; import org.apache.pinot.spi.data.FieldSpec; import org.roaringbitmap.IntIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -138,12 +139,12 @@ public class SVScanDocIdIterator implements ScanBasedDocIdIterator { } @Override - public MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer) { + public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) { MutableRoaringBitmap result = new MutableRoaringBitmap(); if (_evaluator.isAlwaysFalse()) { return result; } - IntIterator intIterator = answer.getIntIterator(); + IntIterator intIterator = docIds.getIntIterator(); int docId = -1; while (intIterator.hasNext() && docId < _endDocId) { docId = intIterator.next(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java index 9255302..0af132d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java @@ -19,6 +19,7 @@ package org.apache.pinot.core.operator.dociditerators; import org.apache.pinot.core.common.BlockDocIdIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -33,7 +34,7 @@ public interface ScanBasedDocIdIterator extends BlockDocIdIterator { boolean isMatch(int docId); - MutableRoaringBitmap applyAnd(MutableRoaringBitmap answer); + MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds); /** * Get number of entries scanned. diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java index ed134b1..1faa669 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java @@ -57,7 +57,7 @@ public class FilterOperatorUtils { //Only for dictionary encoded columns and offline data sources if (predicateType == Predicate.Type.RANGE && dataSource.getDictionary() != null && dataSource.getRangeIndex() != null) { - return new RangeFilterOperator(predicateEvaluator, dataSource, startDocId, endDocId); + return new RangeIndexBasedFilterOperator(predicateEvaluator, dataSource, startDocId, endDocId); } if (predicateType == Predicate.Type.TEXT_MATCH) { @@ -150,17 +150,20 @@ public class FilterOperatorUtils { if (filterOperator instanceof BitmapBasedFilterOperator) { return 1; } - if (filterOperator instanceof TextMatchFilterOperator) { + if (filterOperator instanceof RangeIndexBasedFilterOperator) { return 2; } - if (filterOperator instanceof AndFilterOperator) { + if (filterOperator instanceof TextMatchFilterOperator) { return 3; } - if (filterOperator instanceof OrFilterOperator) { + if (filterOperator instanceof AndFilterOperator) { return 4; } + if (filterOperator instanceof OrFilterOperator) { + return 5; + } if (filterOperator instanceof ScanBasedFilterOperator) { - return getScanBasedFilterPriority((ScanBasedFilterOperator) filterOperator, 5, debugOptions); + return getScanBasedFilterPriority((ScanBasedFilterOperator) filterOperator, 6, debugOptions); } if (filterOperator instanceof ExpressionFilterOperator) { return 10; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java similarity index 52% rename from pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java rename to pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java index 8398bef..b62045f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java @@ -19,33 +19,27 @@ package org.apache.pinot.core.operator.filter; import com.google.common.base.Preconditions; -import java.util.ArrayList; -import java.util.List; -import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.core.common.DataSource; -import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.operator.blocks.FilterBlock; -import org.apache.pinot.core.operator.dociditerators.MVScanDocIdIterator; -import org.apache.pinot.core.operator.dociditerators.SVScanDocIdIterator; import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet; -import org.apache.pinot.core.operator.docidsets.FilterBlockDocIdSet; +import org.apache.pinot.core.operator.docidsets.ScanBasedDocIdSet; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; -import org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory; +import org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator; import org.apache.pinot.core.segment.index.readers.RangeIndexReader; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; -public class RangeFilterOperator extends BaseFilterOperator { +public class RangeIndexBasedFilterOperator extends BaseFilterOperator { private static final String OPERATOR_NAME = "RangeFilterOperator"; - private final PredicateEvaluator _predicateEvaluator; + private final OfflineDictionaryBasedRangePredicateEvaluator _predicateEvaluator; private final DataSource _dataSource; private final int _startDocId; private final int _endDocId; private final boolean _exclusive; - public RangeFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int startDocId, + public RangeIndexBasedFilterOperator(PredicateEvaluator predicateEvaluator, DataSource dataSource, int startDocId, int endDocId) { // NOTE: // Predicate that is always evaluated as true or false should not be passed into the BitmapBasedFilterOperator for @@ -54,7 +48,8 @@ public class RangeFilterOperator extends BaseFilterOperator { // use EmptyFilterOperator. Preconditions.checkArgument(!predicateEvaluator.isAlwaysTrue() && !predicateEvaluator.isAlwaysFalse()); - _predicateEvaluator = predicateEvaluator; + // NOTE: Only dictionary-based evaluator is supported for now + _predicateEvaluator = (OfflineDictionaryBasedRangePredicateEvaluator) predicateEvaluator; _dataSource = dataSource; _startDocId = startDocId; _endDocId = endDocId; @@ -63,48 +58,36 @@ public class RangeFilterOperator extends BaseFilterOperator { @Override protected FilterBlock getNextBlock() { - - //only dictionary based is supported for now - Preconditions.checkState(_predicateEvaluator instanceof RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator); - - RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator evaluator = - (RangePredicateEvaluatorFactory.OfflineDictionaryBasedRangePredicateEvaluator) _predicateEvaluator; - RangeIndexReader rangeIndexReader = (RangeIndexReader) _dataSource.getRangeIndex(); - int startRangeId = rangeIndexReader.findRangeId(evaluator.getStartDictId()); - int endRangeId = rangeIndexReader.findRangeId(evaluator.getEndDictId()); + assert rangeIndexReader != null; + int firstRangeId = rangeIndexReader.findRangeId(_predicateEvaluator.getStartDictId()); + int lastRangeId = rangeIndexReader.findRangeId(_predicateEvaluator.getEndDictId() - 1); //Handle Matching Ranges - some ranges match fully but some partially //below code assumes first and last range always match partially which may not be the case always //todo: optimize it - MutableRoaringBitmap mutableRoaringBitmap = new MutableRoaringBitmap(); - mutableRoaringBitmap.or(rangeIndexReader.getDocIds(startRangeId)); - if (endRangeId != startRangeId) { - mutableRoaringBitmap.or(rangeIndexReader.getDocIds(endRangeId)); - } - final ScanBasedFilterOperator scanBasedFilterOperator = - new ScanBasedFilterOperator(_predicateEvaluator, _dataSource, _startDocId, _endDocId); - FilterBlockDocIdSet scanBlockDocIdSet = scanBasedFilterOperator.getNextBlock().getBlockDocIdSet(); - BlockDocIdIterator iterator = scanBlockDocIdSet.iterator(); - - List<ImmutableRoaringBitmap> bitmapList = new ArrayList<>(); - if (_dataSource.getDataSourceMetadata().isSingleValue()) { - bitmapList.add(((SVScanDocIdIterator) iterator).applyAnd(mutableRoaringBitmap)); + ImmutableRoaringBitmap docIdsToScan; + if (firstRangeId == lastRangeId) { + docIdsToScan = rangeIndexReader.getDocIds(firstRangeId); } else { - bitmapList.add(((MVScanDocIdIterator) iterator).applyAnd(mutableRoaringBitmap)); + docIdsToScan = + ImmutableRoaringBitmap.or(rangeIndexReader.getDocIds(firstRangeId), rangeIndexReader.getDocIds(lastRangeId)); } + ScanBasedFilterOperator scanBasedFilterOperator = + new ScanBasedFilterOperator(_predicateEvaluator, _dataSource, _startDocId, _endDocId); + ScanBasedDocIdSet scanBasedDocIdSet = (ScanBasedDocIdSet) scanBasedFilterOperator.getNextBlock().getBlockDocIdSet(); + MutableRoaringBitmap docIds = scanBasedDocIdSet.iterator().applyAnd(docIdsToScan); //All the intermediate ranges will be full match - for (int rangeId = startRangeId + 1; rangeId < endRangeId; rangeId++) { - bitmapList.add(rangeIndexReader.getDocIds(rangeId)); + for (int rangeId = firstRangeId + 1; rangeId < lastRangeId; rangeId++) { + docIds.or(rangeIndexReader.getDocIds(rangeId)); } - ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[bitmapList.size()]; - bitmapList.toArray(bitmaps); - return new FilterBlock(new BitmapDocIdSet(bitmaps, _startDocId, _endDocId, _exclusive) { + return new FilterBlock( + new BitmapDocIdSet(new ImmutableRoaringBitmap[]{docIds}, _startDocId, _endDocId, _exclusive) { - @Override - public long getNumEntriesScannedInFilter() { - return scanBlockDocIdSet.getNumEntriesScannedInFilter(); - } - }); + @Override + public long getNumEntriesScannedInFilter() { + return scanBasedDocIdSet.getNumEntriesScannedInFilter(); + } + }); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java index ab33ea2..8e1934d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/RangeIndexCreator.java @@ -34,7 +34,6 @@ import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.pinot.core.query.utils.Pair; import org.apache.pinot.core.segment.creator.InvertedIndexCreator; -import org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator; import org.apache.pinot.core.segment.memory.PinotDataBuffer; import org.apache.pinot.spi.data.FieldSpec; import org.roaringbitmap.buffer.MutableRoaringBitmap; @@ -100,7 +99,7 @@ public final class RangeIndexCreator implements InvertedIndexCreator { private final int _numValues; private int _nextDocId; private int _nextValueId; - private int _numDocsPerRange; + private int _numValuesPerRange; private FieldSpec.DataType _valueType; /** @@ -108,14 +107,14 @@ public final class RangeIndexCreator implements InvertedIndexCreator { * @param indexDir destination of the range index file * @param fieldSpec fieldspec of the column to generate the range index * @param valueType DataType of the column, INT if dictionary encoded, or INT, FLOAT, LONG, DOUBLE for raw encoded - * @param numRanges customize the number of ranges, if -1, we use DEFAULT_NUM_RANGES; - * @param numDocsPerRange customize the number of Docs Per Range, if -1 numDocsPerRange = totalValues/numRanges + * @param numRanges Number of ranges, use DEFAULT_NUM_RANGES if not configured (<= 0) + * @param numValuesPerRange Number of values per range, calculate from numRanges if not configured (<= 0) * @param numDocs total number of documents * @param numValues total number of values, used for Multi value columns (for single value columns numDocs== numValues) * @throws IOException */ public RangeIndexCreator(File indexDir, FieldSpec fieldSpec, FieldSpec.DataType valueType, int numRanges, - int numDocsPerRange, int numDocs, int numValues) + int numValuesPerRange, int numDocs, int numValues) throws IOException { _valueType = valueType; String columnName = fieldSpec.getName(); @@ -125,12 +124,14 @@ public final class RangeIndexCreator implements InvertedIndexCreator { _numValues = fieldSpec.isSingleValueField() ? numDocs : numValues; int valueSize = valueType.size(); try { - //use DEFAULT_NUM_RANGES if numRanges is not set - if (numRanges < 0) { - numRanges = DEFAULT_NUM_RANGES; - } - if (numDocsPerRange < 0) { - _numDocsPerRange = (int) Math.ceil(_numValues / numRanges); + Preconditions.checkArgument(numRanges <= 0 || numValuesPerRange <= 0, + "At most one of 'numRanges' and 'numValuesPerRange' should be configured"); + if (numRanges > 0) { + _numValuesPerRange = (_numValues + numRanges - 1) / numRanges; + } else if (numValuesPerRange > 0) { + _numValuesPerRange = numValuesPerRange; + } else { + _numValuesPerRange = (_numValues + DEFAULT_NUM_RANGES - 1) / DEFAULT_NUM_RANGES; } //Value buffer to store the values added via add method @@ -234,7 +235,7 @@ public final class RangeIndexCreator implements InvertedIndexCreator { //go over the sorted value to compute ranges List<Pair<Integer, Integer>> ranges = new ArrayList<>(); - int boundary = _numDocsPerRange; + int boundary = _numValuesPerRange; int start = 0; for (int i = 0; i < _numValues; i++) { if (i > start + boundary) { @@ -412,7 +413,6 @@ public final class RangeIndexCreator implements InvertedIndexCreator { } valuesAsString.append("] "); LOGGER.info(valuesAsString.toString()); - } private PinotDataBuffer createTempBuffer(long size, File mmapFile) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java index 7dcdeda..3a34df0 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/BaseClusterIntegrationTest.java @@ -192,9 +192,7 @@ public abstract class BaseClusterIntegrationTest extends ClusterTest { @Nullable protected List<String> getRangeIndexColumns() { - return null; - // TODO: Fix the range index then uncomment this line -// return DEFAULT_RANGE_INDEX_COLUMNS; + return DEFAULT_RANGE_INDEX_COLUMNS; } @Nullable --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org