xiangfu0 commented on code in PR #17872:
URL: https://github.com/apache/pinot/pull/17872#discussion_r3036331867
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BaseFilterOperator.java:
##########
@@ -82,6 +87,56 @@ public BitmapCollection getBitmaps() {
throw new UnsupportedOperationException();
}
+ /**
+ * Exact filtered docIds for the operator. {@code null} indicates match-all.
+ */
+ public static final class FilteredDocIds {
+ @Nullable
+ private final ImmutableRoaringBitmap _docIds;
+ private final long _numEntriesScannedInFilter;
+
+ private FilteredDocIds(@Nullable ImmutableRoaringBitmap docIds, long
numEntriesScannedInFilter) {
+ _docIds = docIds;
+ _numEntriesScannedInFilter = numEntriesScannedInFilter;
+ }
+
+ @Nullable
+ public ImmutableRoaringBitmap getDocIds() {
+ return _docIds;
+ }
+
+ public long getNumEntriesScannedInFilter() {
+ return _numEntriesScannedInFilter;
+ }
+ }
+
+ /**
+ * Returns the exact filtered docIds for the operator. Implementations that
cannot produce a bitmap directly are
+ * materialized once through the filter operator itself so callers can reuse
the same primitive.
+ */
+ public FilteredDocIds getFilteredDocIds() {
+ if (isResultMatchingAll()) {
+ return new FilteredDocIds(null, 0L);
+ }
+ if (isResultEmpty()) {
+ return new FilteredDocIds(new MutableRoaringBitmap(), 0L);
+ }
+ if (canProduceBitmaps()) {
+ return new FilteredDocIds(getBitmaps().reduce(), 0L);
+ }
+
+ FilterBlock filterBlock = nextBlock();
+ BlockDocIdSet blockDocIdSet = filterBlock.getBlockDocIdSet();
+ BlockDocIdSet nonScanBlockDocIdSet =
filterBlock.getNonScanFilterBLockDocIdSet();
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ BlockDocIdIterator iterator = nonScanBlockDocIdSet.iterator();
+ int docId;
+ while ((docId = iterator.next()) != Constants.EOF) {
+ bitmap.add(docId);
+ }
Review Comment:
Implemented. The scan-materialization branch now calls
`bitmap.runOptimize()` before caching and returning the bitmap, so downstream
bitmap operations reuse the compact form.
##########
pinot-core/src/main/java/org/apache/pinot/core/query/distinct/table/DictIdDistinctTable.java:
##########
@@ -50,6 +53,20 @@ protected IntComparator
getComparator(OrderByExpressionContext orderByExpression
return orderByExpression.isAsc() ? (v1, v2) -> v2 - v1 : (v1, v2) -> v1 -
v2;
Review Comment:
Fixed. `DictIdDistinctTable#getComparator()` now uses `Integer.compare(...)`
in both directions, matching `IntDistinctTable` semantics and avoiding
subtraction overflow.
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java:
##########
@@ -0,0 +1,659 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import com.google.common.base.CaseFormat;
+import java.util.Collections;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.OrderByExpressionContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.common.utils.config.QueryOptionsUtils;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.BaseProjectOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.ExplainAttributeBuilder;
+import org.apache.pinot.core.operator.blocks.ValueBlock;
+import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
+import org.apache.pinot.core.plan.DocIdSetPlanNode;
+import org.apache.pinot.core.plan.ProjectPlanNode;
+import org.apache.pinot.core.query.distinct.DistinctExecutor;
+import org.apache.pinot.core.query.distinct.DistinctExecutorFactory;
+import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable;
+import org.apache.pinot.core.query.distinct.table.BytesDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DictIdDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DistinctTable;
+import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable;
+import org.apache.pinot.core.query.distinct.table.FloatDistinctTable;
+import org.apache.pinot.core.query.distinct.table.IntDistinctTable;
+import org.apache.pinot.core.query.distinct.table.LongDistinctTable;
+import org.apache.pinot.core.query.distinct.table.StringDistinctTable;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
+import org.apache.pinot.segment.spi.index.reader.Dictionary;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader;
+import org.apache.pinot.segment.spi.index.reader.SortedIndexReader;
+import org.apache.pinot.spi.query.QueryThreadContext;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.Pairs;
+import org.roaringbitmap.PeekableIntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+
+
+/**
+ * Inverted-index-based operator for single-column distinct queries on a
single segment.
+ *
+ * <p>Supports three execution paths, chosen at runtime:
+ * <ul>
+ * <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter
bitmap against contiguous doc ranges.
+ * Cost ~ O(cardinality + filteredDocs). Always chosen when the column
has a sorted forward index.</li>
+ * <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and
uses inverted index bitmap intersections
+ * to check filter membership. Avoids the projection pipeline entirely.
Chosen by cost heuristic when dictionary
+ * cardinality is much smaller than the filtered doc count.</li>
+ * <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor
to scan filtered docs.
+ * Used when the cost heuristic determines scanning is cheaper.</li>
+ * </ul>
+ *
+ * <p>Enabled via the {@code useIndexBasedDistinctOperator} query option. The
cost ratio can be tuned
+ * via the {@code invertedIndexDistinctCostRatio} query option.
+ */
+public class InvertedIndexDistinctOperator extends
BaseOperator<DistinctResultsBlock> {
+ private static final String EXPLAIN_NAME = "DISTINCT_INVERTED_INDEX";
+ private static final String EXPLAIN_NAME_SORTED_INDEX =
"DISTINCT_SORTED_INDEX";
+ private static final String EXPLAIN_NAME_SCAN_FALLBACK = "DISTINCT";
+
+ private final IndexSegment _indexSegment;
+ private final SegmentContext _segmentContext;
+ private final QueryContext _queryContext;
+ private final BaseFilterOperator _filterOperator;
+ private final DataSource _dataSource;
+ private final Dictionary _dictionary;
+ private final InvertedIndexReader<?> _invertedIndexReader;
+
+ // Scan path: created lazily when scan fallback is chosen
+ private BaseProjectOperator<?> _projectOperator;
+
+ // Execution tracking
+ private boolean _usedInvertedIndexPath = false;
+ private int _numDocsScanned = 0;
+ private int _numEntriesExamined = 0;
+ private long _numEntriesScannedInFilter = 0;
+
+ /**
+ * Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode)
must verify that the column
+ * has both a dictionary and an inverted index before constructing this
operator.
+ */
+ public InvertedIndexDistinctOperator(IndexSegment indexSegment,
SegmentContext segmentContext,
+ QueryContext queryContext, BaseFilterOperator filterOperator, DataSource
dataSource) {
+ _indexSegment = indexSegment;
+ _segmentContext = segmentContext;
+ _queryContext = queryContext;
+ _filterOperator = filterOperator;
+ _dataSource = dataSource;
+ _dictionary = dataSource.getDictionary();
+ _invertedIndexReader = dataSource.getInvertedIndex();
+ }
+
+ @Override
+ protected DistinctResultsBlock getNextBlock() {
+ ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
+
+ // Sorted index: always use the sorted path — O(cardinality +
filteredDocs) merge iteration
+ if (_invertedIndexReader instanceof SortedIndexReader) {
+ _usedInvertedIndexPath = true;
+ return executeSortedIndexPath((SortedIndexReader<?>)
_invertedIndexReader, filteredDocIds);
+ }
+ // Bitmap inverted index: use cost heuristic to decide
+ if (shouldUseBitmapInvertedIndex(filteredDocIds)) {
+ _usedInvertedIndexPath = true;
+ return executeInvertedIndexPath(filteredDocIds);
+ }
+ return executeScanPath(filteredDocIds);
+ }
+
+ // ==================== Cost Heuristic ====================
+
+ /**
+ * Default cost ratios for the inverted-index-based distinct heuristic,
keyed by dictionary cardinality threshold.
+ * The inverted index path is chosen when {@code dictionaryCardinality *
costRatio <= filteredDocCount}.
+ *
+ * <p>The cost ratio accounts for the per-entry bitmap intersection cost
relative to the per-doc scan cost.
+ * For low-cardinality dictionaries, each bitmap is dense and {@code
intersects()} is fast, but there are few
+ * entries so any unnecessary intersection is relatively expensive vs.
scanning a small filtered doc set.
+ * For high-cardinality dictionaries, bitmaps are sparser and {@code
intersects()} is slower per entry,
+ * but the scan path also becomes cheaper (fewer docs per value), so a lower
ratio suffices.
+ *
+ * <p>Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the
crossover points:
+ * <ul>
+ * <li>dictCard ≤ 1K: costRatio=30 — inverted index wins when
filteredDocs ≥ ~30x dictCard</li>
+ * <li>dictCard ≤ 10K: costRatio=10 — inverted index wins when
filteredDocs ≥ ~10x dictCard</li>
+ * <li>dictCard > 10K: costRatio=6 — inverted index wins when
filteredDocs ≥ ~6x dictCard</li>
+ * </ul>
+ *
+ * <p>Can be overridden at query time via the query option {@code
invertedIndexDistinctCostRatio}.
+ */
+ static final NavigableMap<Integer, Double> DEFAULT_COST_RATIO_BY_CARDINALITY;
+
+ static {
+ TreeMap<Integer, Double> map = new TreeMap<>();
+ map.put(0, 30.0); // dictCard <= 1000: costRatio = 30
+ map.put(1_001, 10.0); // dictCard 1001..10000: costRatio = 10
+ map.put(10_001, 6.0); // dictCard > 10000: costRatio = 6
+ DEFAULT_COST_RATIO_BY_CARDINALITY =
Collections.unmodifiableNavigableMap(map);
+ }
+
+ static double getDefaultCostRatio(int dictionaryCardinality) {
+ return
DEFAULT_COST_RATIO_BY_CARDINALITY.floorEntry(dictionaryCardinality).getValue();
+ }
+
+ private boolean shouldUseBitmapInvertedIndex(@Nullable
ImmutableRoaringBitmap filteredDocIds) {
+ int dictionaryCardinality = _dictionary.length();
+ int filteredDocCount = filteredDocIds == null
+ ? _indexSegment.getSegmentMetadata().getTotalDocs()
+ : filteredDocIds.getCardinality();
+ if (filteredDocCount == 0) {
+ return false;
+ }
+ Double costRatioOverride =
QueryOptionsUtils.getInvertedIndexDistinctCostRatio(_queryContext.getQueryOptions());
+ double costRatio = costRatioOverride != null ? costRatioOverride :
getDefaultCostRatio(dictionaryCardinality);
+ return (double) dictionaryCardinality * costRatio <= filteredDocCount;
+ }
+
+ // ==================== Scan Path (Fallback) ====================
+
+ /**
+ * Scan fallback: uses ProjectOperator + DistinctExecutor. When the filter
bitmap was already materialized
+ * by {@link #buildFilteredDocIds()}, wraps it in a {@link
BitmapBasedFilterOperator} to avoid re-evaluating
+ * the filter through the projection pipeline.
+ */
+ private DistinctResultsBlock executeScanPath(@Nullable
ImmutableRoaringBitmap filteredDocIds) {
+ BaseFilterOperator filterOp;
+ if (filteredDocIds != null) {
+ filterOp = new BitmapBasedFilterOperator(filteredDocIds, false,
+ _indexSegment.getSegmentMetadata().getTotalDocs());
+ } else {
+ filterOp = _filterOperator;
+ }
+ _projectOperator = new ProjectPlanNode(_segmentContext, _queryContext,
+ _queryContext.getSelectExpressions(),
DocIdSetPlanNode.MAX_DOC_PER_CALL, filterOp).run();
+ DistinctExecutor executor =
DistinctExecutorFactory.getDistinctExecutor(_projectOperator, _queryContext);
+ ValueBlock valueBlock;
+ while ((valueBlock = _projectOperator.nextBlock()) != null) {
+ _numDocsScanned += valueBlock.getNumDocs();
+ if (executor.process(valueBlock)) {
+ break;
+ }
+ }
+ return new DistinctResultsBlock(executor.getResult(), _queryContext);
+ }
+
+ // ==================== Sorted Index Path ====================
+
+ /**
+ * Optimized path for sorted columns. Each dictId maps to a contiguous doc
range [start, end].
+ * We merge-iterate the filter bitmap with the sorted ranges in
O(cardinality + filteredDocs).
+ */
+ private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?>
sortedReader,
+ @Nullable ImmutableRoaringBitmap filteredDocIds) {
+ OrderByExpressionContext orderByExpression =
+ _queryContext.getOrderByExpressions() != null ?
_queryContext.getOrderByExpressions().get(0) : null;
+ boolean useDictIdTable = canUseDictIdDistinctTable(orderByExpression);
+ DistinctTable distinctTable =
+ useDictIdTable ? createDictIdDistinctTable(orderByExpression) :
createTypedDistinctTable(orderByExpression);
+ int dictLength = _dictionary.length();
+ // Process null handling: exclude null docs from filter and determine if
nulls are present
+ NullFilterResult nullResult = processNullDocs(filteredDocIds);
+ ImmutableRoaringBitmap nonNullFilteredDocIds =
nullResult._nonNullFilteredDocIds;
+ if (nullResult._hasNull) {
+ distinctTable.addNull();
+ }
+
+ // When dictIds are in value order, ORDER BY + LIMIT can terminate early
by iterating in the ORDER BY direction.
+ boolean orderedEarlyTermination = useDictIdTable && orderByExpression !=
null && distinctTable.hasLimit();
+ boolean iterateReverse = orderedEarlyTermination &&
!orderByExpression.isAsc();
+
+ if (nonNullFilteredDocIds == null) {
+ // No filter, no null exclusion — every dictionary value is present
+ int entriesExamined = 0;
+ int start = iterateReverse ? dictLength - 1 : 0;
+ int end = iterateReverse ? -1 : dictLength;
+ int step = iterateReverse ? -1 : 1;
+ for (int dictId = start; dictId != end; dictId += step) {
+
QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined,
EXPLAIN_NAME_SORTED_INDEX);
+ entriesExamined++;
+ boolean done = addDistinctValue(distinctTable, dictId,
orderByExpression, orderedEarlyTermination);
+ if (done) {
+ break;
+ }
+ }
+ _numEntriesExamined = entriesExamined;
+ } else if (!nonNullFilteredDocIds.isEmpty()) {
+ if (iterateReverse) {
+ // DESC + LIMIT: iterate dictIds backward, use rangeCardinality for
presence check.
+ // Each dictId maps to a contiguous doc range, so rangeCardinality is
O(1) per check.
+ int entriesExamined = 0;
+ for (int dictId = dictLength - 1; dictId >= 0; dictId--) {
+
QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined,
EXPLAIN_NAME_SORTED_INDEX);
+ entriesExamined++;
+ Pairs.IntPair range = sortedReader.getDocIds(dictId);
+ int startDocId = range.getLeft();
+ int endDocId = range.getRight(); // inclusive
+ if (nonNullFilteredDocIds.rangeCardinality(startDocId, endDocId +
1L) > 0) {
+ if (addDistinctValue(distinctTable, dictId, orderByExpression,
true)) {
+ break;
+ }
+ }
+ }
+ _numEntriesExamined = entriesExamined;
+ } else {
+ // ASC or no ORDER BY: merge-iterate forward (O(cardinality +
filteredDocs))
+ PeekableIntIterator filterIter =
nonNullFilteredDocIds.getIntIterator();
+ int dictId;
+ for (dictId = 0; dictId < dictLength && filterIter.hasNext();
dictId++) {
+
QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId,
EXPLAIN_NAME_SORTED_INDEX);
+ Pairs.IntPair range = sortedReader.getDocIds(dictId);
+ int startDocId = range.getLeft();
+ int endDocId = range.getRight(); // inclusive
+
+ // Skip filter docs before this range
+ filterIter.advanceIfNeeded(startDocId);
+
+ // Check if any non-null filter doc falls within this range
+ if (filterIter.hasNext() && filterIter.peekNext() <= endDocId) {
+ boolean done = addDistinctValue(distinctTable, dictId,
orderByExpression, orderedEarlyTermination);
+ if (done) {
+ _numEntriesExamined = dictId + 1;
+ return new
DistinctResultsBlock(convertDistinctTable(distinctTable, nullResult._hasNull),
_queryContext);
+ }
+ // Advance past the current range for next dictId
+ filterIter.advanceIfNeeded(endDocId + 1);
+ }
+ }
+ _numEntriesExamined = dictId;
+ }
+ }
+
+ return new DistinctResultsBlock(convertDistinctTable(distinctTable,
nullResult._hasNull), _queryContext);
+ }
+
+ // ==================== Bitmap Inverted Index Path ====================
+
+ private DistinctResultsBlock executeInvertedIndexPath(@Nullable
ImmutableRoaringBitmap filteredDocIds) {
+ // Process null handling: exclude null docs from filter and determine if
nulls are present
+ NullFilterResult nullResult = processNullDocs(filteredDocIds);
+ ImmutableRoaringBitmap nonNullFilteredDocIds =
nullResult._nonNullFilteredDocIds;
+ OrderByExpressionContext orderByExpression =
+ _queryContext.getOrderByExpressions() != null ?
_queryContext.getOrderByExpressions().get(0) : null;
+ boolean useDictIdTable = canUseDictIdDistinctTable(orderByExpression);
+ DistinctTable distinctTable =
+ useDictIdTable ? createDictIdDistinctTable(orderByExpression) :
createTypedDistinctTable(orderByExpression);
+ int dictLength = _dictionary.length();
+ if (nullResult._hasNull) {
+ distinctTable.addNull();
+ }
+
+ // When dictIds are in value order, ORDER BY + LIMIT can terminate early
by iterating in the ORDER BY direction.
+ boolean orderedEarlyTermination = useDictIdTable && orderByExpression !=
null && distinctTable.hasLimit();
+ boolean iterateReverse = orderedEarlyTermination &&
!orderByExpression.isAsc();
+
+ int entriesExamined = 0;
+ int start = iterateReverse ? dictLength - 1 : 0;
+ int end = iterateReverse ? -1 : dictLength;
+ int step = iterateReverse ? -1 : 1;
+
+ for (int dictId = start; dictId != end; dictId += step) {
+
QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined,
EXPLAIN_NAME);
+ entriesExamined++;
+
+ // SortedIndexReader is handled separately in getNextBlock(), so this
path only sees bitmap inverted indexes
+ // whose getDocIds() returns ImmutableRoaringBitmap.
+ ImmutableRoaringBitmap docIds = (ImmutableRoaringBitmap)
_invertedIndexReader.getDocIds(dictId);
+
+ // Use intersects() for early termination instead of computing full
intersection.
+ // intersects() short-circuits on the first common element, which is
orders of magnitude
+ // faster than RoaringBitmap.and() especially for low-cardinality
(dense) bitmaps.
+ // Use the non-null filter bitmap to skip the null placeholder value.
+ boolean includeValue;
+ if (nonNullFilteredDocIds == null) {
+ includeValue = true;
+ } else {
+ includeValue = ImmutableRoaringBitmap.intersects(docIds,
nonNullFilteredDocIds);
+ }
+
+ if (includeValue) {
+ boolean done = addDistinctValue(distinctTable, dictId,
orderByExpression, orderedEarlyTermination);
+ if (done) {
+ break;
+ }
+ }
+ }
+ _numEntriesExamined = entriesExamined;
+
+ return new DistinctResultsBlock(convertDistinctTable(distinctTable,
nullResult._hasNull), _queryContext);
+ }
+
+ @Nullable
+ private ImmutableRoaringBitmap buildFilteredDocIds() {
+ BaseFilterOperator.FilteredDocIds filteredDocIds =
_filterOperator.getFilteredDocIds();
+ _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
+ return filteredDocIds.getDocIds();
+ }
+
+ private boolean canUseDictIdDistinctTable(@Nullable OrderByExpressionContext
orderByExpression) {
+ return orderByExpression == null || _dictionary.isSorted();
+ }
+
+ private DataSchema createDataSchema() {
+ ExpressionContext expr = _queryContext.getSelectExpressions().get(0);
+ String column = expr.getIdentifier();
+ DataSourceMetadata dataSourceMetadata =
_dataSource.getDataSourceMetadata();
+ return new DataSchema(new String[]{column},
+ new
ColumnDataType[]{ColumnDataType.fromDataTypeSV(dataSourceMetadata.getDataType())});
+ }
+
+ private DictIdDistinctTable createDictIdDistinctTable(@Nullable
OrderByExpressionContext orderByExpression) {
+ return new DictIdDistinctTable(createDataSchema(),
_queryContext.getLimit(), _queryContext.isNullHandlingEnabled(),
+ orderByExpression);
+ }
+
+ private DistinctTable createTypedDistinctTable(@Nullable
OrderByExpressionContext orderByExpression) {
+ DataSchema dataSchema = createDataSchema();
+ int limit = _queryContext.getLimit();
+ boolean nullHandlingEnabled = _queryContext.isNullHandlingEnabled();
+ switch (_dictionary.getValueType()) {
+ case INT:
+ return new IntDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ case LONG:
+ return new LongDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ case FLOAT:
+ return new FloatDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ case DOUBLE:
+ return new DoubleDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ case BIG_DECIMAL:
+ return new BigDecimalDistinctTable(dataSchema, limit,
nullHandlingEnabled, orderByExpression);
+ case STRING:
+ return new StringDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ case BYTES:
+ return new BytesDistinctTable(dataSchema, limit, nullHandlingEnabled,
orderByExpression);
+ default:
+ throw new IllegalStateException("Unsupported data type: " +
_dictionary.getValueType());
+ }
+ }
+
+ private DistinctTable convertDistinctTable(DistinctTable distinctTable,
boolean hasNull) {
+ if (distinctTable instanceof DictIdDistinctTable) {
+ return ((DictIdDistinctTable)
distinctTable).toTypedDistinctTable(_dictionary, hasNull);
+ }
+ return distinctTable;
+ }
+
+ private boolean addDistinctValue(DistinctTable distinctTable, int dictId,
+ @Nullable OrderByExpressionContext orderByExpression, boolean
orderedEarlyTermination) {
+ if (distinctTable instanceof DictIdDistinctTable) {
+ DictIdDistinctTable dictIdDistinctTable = (DictIdDistinctTable)
distinctTable;
+ if (orderedEarlyTermination) {
+ return dictIdDistinctTable.addForOrderedEarlyTermination(dictId);
+ }
+ if (dictIdDistinctTable.hasLimit()) {
+ if (orderByExpression != null) {
+ dictIdDistinctTable.addWithOrderBy(dictId);
+ return false;
+ }
+ return dictIdDistinctTable.addWithoutOrderBy(dictId);
+ }
+ dictIdDistinctTable.addUnbounded(dictId);
+ return false;
+ }
+
+ switch (_dictionary.getValueType()) {
+ case INT: {
+ IntDistinctTable table = (IntDistinctTable) distinctTable;
+ int value = _dictionary.getIntValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case LONG: {
+ LongDistinctTable table = (LongDistinctTable) distinctTable;
+ long value = _dictionary.getLongValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case FLOAT: {
+ FloatDistinctTable table = (FloatDistinctTable) distinctTable;
+ float value = _dictionary.getFloatValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case DOUBLE: {
+ DoubleDistinctTable table = (DoubleDistinctTable) distinctTable;
+ double value = _dictionary.getDoubleValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case BIG_DECIMAL: {
+ BigDecimalDistinctTable table = (BigDecimalDistinctTable)
distinctTable;
+ java.math.BigDecimal value = _dictionary.getBigDecimalValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case STRING: {
+ StringDistinctTable table = (StringDistinctTable) distinctTable;
+ String value = _dictionary.getStringValue(dictId);
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ case BYTES: {
+ BytesDistinctTable table = (BytesDistinctTable) distinctTable;
+ ByteArray value = new ByteArray(_dictionary.getBytesValue(dictId));
+ if (table.hasLimit()) {
+ if (orderByExpression != null) {
+ table.addWithOrderBy(value);
+ return false;
+ }
+ return table.addWithoutOrderBy(value);
+ }
+ table.addUnbounded(value);
+ return false;
+ }
+ default:
+ throw new IllegalStateException("Unsupported data type: " +
_dictionary.getValueType());
+ }
+ }
+
+ // ==================== Null Handling ====================
+
+ /**
+ * Processes null handling for the filter bitmap. Returns the filter bitmap
with null docs excluded
+ * and whether any filtered docs have null values.
+ *
+ * <p>Nulls are not in the dictionary, so they must be checked separately
via the null value vector.
+ * The null placeholder value (e.g., Integer.MIN_VALUE) is excluded from
dictionary iteration by
+ * removing null docs from the filter bitmap.
+ */
+ private NullFilterResult processNullDocs(@Nullable ImmutableRoaringBitmap
filteredDocIds) {
+ if (!_queryContext.isNullHandlingEnabled()) {
+ return new NullFilterResult(filteredDocIds, false);
+ }
+ NullValueVectorReader nullReader = _dataSource.getNullValueVector();
+ if (nullReader == null) {
+ return new NullFilterResult(filteredDocIds, false);
+ }
+ ImmutableRoaringBitmap nullBitmap = nullReader.getNullBitmap();
+ if (nullBitmap == null || nullBitmap.isEmpty()) {
+ return new NullFilterResult(filteredDocIds, false);
+ }
+ // Determine if any filtered doc has null
+ boolean hasNull = filteredDocIds == null ||
ImmutableRoaringBitmap.intersects(nullBitmap, filteredDocIds);
+ // Exclude null docs from filter bitmap
+ ImmutableRoaringBitmap nonNullFilteredDocIds;
+ if (filteredDocIds == null) {
+ // Match-all: flip null bitmap to get all non-null docs
+ nonNullFilteredDocIds = ImmutableRoaringBitmap.flip(nullBitmap, 0L,
+ _indexSegment.getSegmentMetadata().getTotalDocs());
+ } else {
+ nonNullFilteredDocIds = ImmutableRoaringBitmap.andNot(filteredDocIds,
nullBitmap);
+ }
Review Comment:
Implemented. For match-all with null handling, `processNullDocs()` now
preserves the match-all sentinel and skips the dictionary null-placeholder
dictId directly, so we avoid allocating a dense complement bitmap while keeping
null-placeholder semantics correct. The broader
`InvertedIndexDistinctOperatorTest` null/order-by coverage passes with this
path.
##########
pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java:
##########
@@ -75,14 +79,33 @@ public Operator<DistinctResultsBlock> run() {
}
}
- // Use JSON index directly for DISTINCT jsonExtractIndex when query option
useIndexBasedDistinctOperator=true
- // (disabled by default; opt-in via query option)
- if
(QueryOptionsUtils.isUseIndexBasedDistinctOperator(_queryContext.getQueryOptions())
&& expressions.size() == 1) {
+ // Use index-based distinct operators when opted in via query option
+ if (expressions.size() == 1 &&
QueryOptionsUtils.isUseIndexBasedDistinctOperator(_queryContext.getQueryOptions()))
{
ExpressionContext expr = expressions.get(0);
Review Comment:
This already routes to `InvertedIndexDistinctOperator` when the no-filter
dictionary fast path is skipped because null handling finds actual nulls and
`useIndexBasedDistinctOperator=true` is set. The early return only fires when
`DictionaryBasedDistinctOperator` is actually usable; otherwise control falls
through to the opt-in index-based block below.
##########
pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java:
##########
@@ -75,14 +79,33 @@ public Operator<DistinctResultsBlock> run() {
}
}
- // Use JSON index directly for DISTINCT jsonExtractIndex when query option
useIndexBasedDistinctOperator=true
- // (disabled by default; opt-in via query option)
- if
(QueryOptionsUtils.isUseIndexBasedDistinctOperator(_queryContext.getQueryOptions())
&& expressions.size() == 1) {
+ // Use index-based distinct operators when opted in via query option
+ if (expressions.size() == 1 &&
QueryOptionsUtils.isUseIndexBasedDistinctOperator(_queryContext.getQueryOptions()))
{
ExpressionContext expr = expressions.get(0);
+
+ // JSON index path
if (JsonIndexDistinctOperator.canUseJsonIndexDistinct(_indexSegment,
expr)) {
BaseFilterOperator filterOperator = new
FilterPlanNode(_segmentContext, _queryContext).run();
return new JsonIndexDistinctOperator(_indexSegment, _segmentContext,
_queryContext, filterOperator);
}
+
+ // Inverted/sorted index path. For unsorted dictionaries the operator
still avoids the scan/projection path,
+ // but ORDER BY pruning is disabled and ordering is maintained with the
typed distinct table instead.
+ String column = expr.getIdentifier();
Review Comment:
Updated the PR description to match the implementation: unsorted
dictionaries are supported, ORDER BY pruning is limited to the
sorted-dictionary fast path, and scan fallback only reuses an exact bitmap when
it is already cheap to obtain.
##########
pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java:
##########
@@ -590,6 +602,25 @@ private static IllegalArgumentException
intParseException(String optionName, Str
String.format("%s must be a number between %d and 2^31-1, got: %s",
optionName, minValue, optionValue));
}
+ @Nullable
+ private static Double checkedParseDoublePositive(String optionName,
@Nullable String optionValue) {
+ if (optionValue == null) {
+ return null;
+ }
+ double value;
+ try {
+ value = Double.parseDouble(optionValue);
+ } catch (NumberFormatException nfe) {
+ throw new IllegalArgumentException(
+ String.format("%s must be a positive number, got: %s", optionName,
optionValue));
+ }
+ if (value <= 0) {
+ throw new IllegalArgumentException(
+ String.format("%s must be a positive number, got: %s", optionName,
optionValue));
+ }
+ return value;
+ }
Review Comment:
Implemented. `checkedParseDoublePositive()` now trims the value and rejects
non-finite inputs such as `NaN` and `Infinity`, and `QueryOptionsUtilsTest` now
covers both valid and invalid ratios.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]