Copilot commented on code in PR #17872: URL: https://github.com/apache/pinot/pull/17872#discussion_r2963584947
########## pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java: ########## @@ -0,0 +1,642 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.query; + +import com.google.common.base.CaseFormat; +import it.unimi.dsi.fastutil.ints.IntIterator; +import java.util.Collections; +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.OrderByExpressionContext; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.common.utils.config.QueryOptionsUtils; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.operator.BaseOperator; +import org.apache.pinot.core.operator.BaseProjectOperator; +import org.apache.pinot.core.operator.ExecutionStatistics; +import org.apache.pinot.core.operator.ExplainAttributeBuilder; +import org.apache.pinot.core.operator.blocks.DocIdSetBlock; +import org.apache.pinot.core.operator.blocks.ValueBlock; +import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock; +import org.apache.pinot.core.operator.filter.BaseFilterOperator; +import org.apache.pinot.core.plan.DocIdSetPlanNode; +import org.apache.pinot.core.plan.ProjectPlanNode; +import org.apache.pinot.core.query.distinct.DistinctExecutor; +import org.apache.pinot.core.query.distinct.DistinctExecutorFactory; +import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable; +import org.apache.pinot.core.query.distinct.table.BytesDistinctTable; +import org.apache.pinot.core.query.distinct.table.DictIdDistinctTable; +import org.apache.pinot.core.query.distinct.table.DistinctTable; +import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable; +import org.apache.pinot.core.query.distinct.table.FloatDistinctTable; +import org.apache.pinot.core.query.distinct.table.IntDistinctTable; +import org.apache.pinot.core.query.distinct.table.LongDistinctTable; +import org.apache.pinot.core.query.distinct.table.StringDistinctTable; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.SegmentContext; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.datasource.DataSourceMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader; +import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader; +import org.apache.pinot.segment.spi.index.reader.SortedIndexReader; +import org.apache.pinot.spi.query.QueryThreadContext; +import org.apache.pinot.spi.utils.ByteArray; +import org.apache.pinot.spi.utils.Pairs; +import org.roaringbitmap.PeekableIntIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + + +/** + * Inverted-index-based operator for single-column distinct queries on a single segment. + * + * <p>Supports three execution paths, chosen at runtime: + * <ul> + * <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter bitmap against contiguous doc ranges. + * Cost ~ O(cardinality + filteredDocs). Always chosen when the column has a sorted forward index.</li> + * <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and uses inverted index bitmap intersections + * to check filter membership. Avoids the projection pipeline entirely. Chosen by cost heuristic when dictionary + * cardinality is much smaller than the filtered doc count.</li> + * <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor to scan filtered docs. + * Used when the cost heuristic determines scanning is cheaper.</li> + * </ul> + * + * <p>Enabled via the {@code useIndexBasedDistinctOperator} query option. The cost ratio can be tuned + * via the {@code invertedIndexDistinctCostRatio} query option. + */ +public class InvertedIndexDistinctOperator extends BaseOperator<DistinctResultsBlock> { + private static final String EXPLAIN_NAME = "DISTINCT_INVERTED_INDEX"; + private static final String EXPLAIN_NAME_SORTED_INDEX = "DISTINCT_SORTED_INDEX"; + private static final String EXPLAIN_NAME_SCAN_FALLBACK = "DISTINCT"; + + private final IndexSegment _indexSegment; + private final SegmentContext _segmentContext; + private final QueryContext _queryContext; + private final BaseFilterOperator _filterOperator; + private final DataSource _dataSource; + private final Dictionary _dictionary; + private final InvertedIndexReader<?> _invertedIndexReader; + + // Scan path: created lazily when scan fallback is chosen + private BaseProjectOperator<?> _projectOperator; + + // Cached filter bitmap from the heuristic phase, reused by buildFilteredDocIds() to avoid + // a redundant getBitmaps().reduce() call. + private ImmutableRoaringBitmap _cachedFilterBitmap; + + // Execution tracking + private boolean _usedInvertedIndexPath = false; + private int _numDocsScanned = 0; + private int _numEntriesExamined = 0; + private long _numEntriesScannedInFilter = 0; + + /** + * Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode) must verify that the column + * has both a dictionary and an inverted index before constructing this operator. + */ + public InvertedIndexDistinctOperator(IndexSegment indexSegment, SegmentContext segmentContext, + QueryContext queryContext, BaseFilterOperator filterOperator, DataSource dataSource) { + _indexSegment = indexSegment; + _segmentContext = segmentContext; + _queryContext = queryContext; + _filterOperator = filterOperator; + _dataSource = dataSource; + _dictionary = dataSource.getDictionary(); + _invertedIndexReader = dataSource.getInvertedIndex(); + } + + @Override + protected DistinctResultsBlock getNextBlock() { + // Sorted index: always use the sorted path — O(cardinality + filteredDocs) merge iteration + if (_invertedIndexReader instanceof SortedIndexReader) { + _usedInvertedIndexPath = true; + return executeSortedIndexPath((SortedIndexReader<?>) _invertedIndexReader); + } + // Bitmap inverted index: use cost heuristic to decide + if (shouldUseBitmapInvertedIndex()) { + _usedInvertedIndexPath = true; + return executeInvertedIndexPath(); + } + return executeScanPath(); + } + + // ==================== Cost Heuristic ==================== + + /** + * Default cost ratios for the inverted-index-based distinct heuristic, keyed by dictionary cardinality threshold. + * The inverted index path is chosen when {@code dictionaryCardinality * costRatio <= filteredDocCount}. + * + * <p>The cost ratio accounts for the per-entry bitmap intersection cost relative to the per-doc scan cost. + * For low-cardinality dictionaries, each bitmap is dense and {@code intersects()} is fast, but there are few + * entries so any unnecessary intersection is relatively expensive vs. scanning a small filtered doc set. + * For high-cardinality dictionaries, bitmaps are sparser and {@code intersects()} is slower per entry, + * but the scan path also becomes cheaper (fewer docs per value), so a lower ratio suffices. + * + * <p>Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the crossover points: + * <ul> + * <li>dictCard ≤ 1K: costRatio=30 — inverted index wins when filteredDocs ≥ ~30x dictCard</li> + * <li>dictCard ≤ 10K: costRatio=10 — inverted index wins when filteredDocs ≥ ~10x dictCard</li> + * <li>dictCard > 10K: costRatio=6 — inverted index wins when filteredDocs ≥ ~6x dictCard</li> + * </ul> + * + * <p>Can be overridden at query time via the query option {@code invertedIndexDistinctCostRatio}. + */ + static final NavigableMap<Integer, Integer> DEFAULT_COST_RATIO_BY_CARDINALITY; + + static { + TreeMap<Integer, Integer> map = new TreeMap<>(); + map.put(0, 30); // dictCard <= 1000: costRatio = 30 + map.put(1_001, 10); // dictCard 1001..10000: costRatio = 10 + map.put(10_001, 6); // dictCard > 10000: costRatio = 6 + DEFAULT_COST_RATIO_BY_CARDINALITY = Collections.unmodifiableNavigableMap(map); + } + + static int getDefaultCostRatio(int dictionaryCardinality) { + return DEFAULT_COST_RATIO_BY_CARDINALITY.floorEntry(dictionaryCardinality).getValue(); + } + + private boolean shouldUseBitmapInvertedIndex() { + int dictionaryCardinality = _dictionary.length(); + int filteredDocCount = estimateFilteredDocCount(); + if (filteredDocCount == 0) { + return false; + } + if (dictionaryCardinality == 0) { + return true; + } + Integer costRatioOverride = QueryOptionsUtils.getInvertedIndexDistinctCostRatio(_queryContext.getQueryOptions()); + int costRatio = costRatioOverride != null ? costRatioOverride : getDefaultCostRatio(dictionaryCardinality); + return (long) dictionaryCardinality * costRatio <= filteredDocCount; + } + + /** + * Cheaply estimates the number of docs matching the filter without consuming the filter operator. + */ + private int estimateFilteredDocCount() { + if (_filterOperator.isResultEmpty()) { + return 0; + } + if (_filterOperator.isResultMatchingAll()) { + return _indexSegment.getSegmentMetadata().getTotalDocs(); + } + if (_filterOperator.canOptimizeCount()) { + return _filterOperator.getNumMatchingDocs(); + } + if (_filterOperator.canProduceBitmaps()) { + _cachedFilterBitmap = _filterOperator.getBitmaps().reduce(); + return _cachedFilterBitmap.getCardinality(); + } + // Fallback for expression-based or complex filters that don't support count/bitmap estimation. + // Returns totalDocs, which biases toward the inverted index path. This is acceptable because: + // (1) the operator is opt-in (useIndexBasedDistinctOperator=true), so the user explicitly requested it, + // (2) the inverted index path is always correct, just potentially slower for very selective filters, + // (3) the cost heuristic still bounds the decision — high-cardinality dictionaries will prefer scan + // even with a large filteredDocCount estimate, and + // (4) the performance penalty for incorrect path choice is bounded: buildFilteredDocIds() materializes + // the filter bitmap via DocIdSetOperator, which is O(filteredDocs) — comparable to scan overhead. + return _indexSegment.getSegmentMetadata().getTotalDocs(); Review Comment: In the heuristic fallback when the filter can't provide counts/bitmaps, returning totalDocs will often force the inverted-index path even for highly selective complex filters. In that case the operator can still end up doing O(dictionaryCardinality) `intersects()` checks, which is not bounded by the O(filteredDocs) bitmap materialization mentioned in the comment and can be dramatically slower than the scan path. Consider making this fallback more conservative (e.g., default to scan) or materializing the filtered docIds once to obtain an exact `filteredDocCount` before deciding between scan vs inverted-index. ```suggestion // Conservatively return a very small estimate to bias the cost heuristic toward the scan path. // This avoids forcing the bitmap inverted-index path, which can still perform O(dictionaryCardinality) // intersects() checks and be significantly slower than scanning for highly selective filters. // The exact value is not semantically meaningful; it only influences path selection. return 1; ``` ########## pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java: ########## @@ -173,6 +173,16 @@ public static boolean isUseIndexBasedDistinctOperator(Map<String, String> queryO return Boolean.parseBoolean(queryOptions.get(QueryOptionKey.USE_INDEX_BASED_DISTINCT_OPERATOR)); } + /** + * Returns the cost ratio for the inverted-index-based distinct heuristic, or null if not set. + * The inverted index path is chosen when dictionaryCardinality * costRatio <= filteredDocCount. + */ + @Nullable + public static Integer getInvertedIndexDistinctCostRatio(Map<String, String> queryOptions) { + return checkedParseIntPositive(QueryOptionKey.INVERTED_INDEX_DISTINCT_COST_RATIO, + queryOptions.get(QueryOptionKey.INVERTED_INDEX_DISTINCT_COST_RATIO)); + } Review Comment: The Javadoc for `isUseIndexBasedDistinctOperator()` above still says it only enables `JsonIndexDistinctOperator`, but this query option now also gates `InvertedIndexDistinctOperator`. Updating the comment would help prevent confusion for future readers since this file now contains both related query-option helpers. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
