Re: [PR] Add inverted-index-based distinct operator with runtime cost heuristic [pinot]

via GitHub Thu, 19 Mar 2026 22:25:27 -0700


Copilot commented on code in PR #17872:
URL: https://github.com/apache/pinot/pull/17872#discussion_r2964044376



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java:
##########
@@ -0,0 +1,639 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import com.google.common.base.CaseFormat;
+import it.unimi.dsi.fastutil.ints.IntIterator;
+import java.util.Collections;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.OrderByExpressionContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.common.utils.config.QueryOptionsUtils;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.BaseProjectOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.ExplainAttributeBuilder;
+import org.apache.pinot.core.operator.blocks.DocIdSetBlock;
+import org.apache.pinot.core.operator.blocks.ValueBlock;
+import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.plan.DocIdSetPlanNode;
+import org.apache.pinot.core.plan.ProjectPlanNode;
+import org.apache.pinot.core.query.distinct.DistinctExecutor;
+import org.apache.pinot.core.query.distinct.DistinctExecutorFactory;
+import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable;
+import org.apache.pinot.core.query.distinct.table.BytesDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DictIdDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DistinctTable;
+import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable;
+import org.apache.pinot.core.query.distinct.table.FloatDistinctTable;
+import org.apache.pinot.core.query.distinct.table.IntDistinctTable;
+import org.apache.pinot.core.query.distinct.table.LongDistinctTable;
+import org.apache.pinot.core.query.distinct.table.StringDistinctTable;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
+import org.apache.pinot.segment.spi.index.reader.Dictionary;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader;
+import org.apache.pinot.segment.spi.index.reader.SortedIndexReader;
+import org.apache.pinot.spi.query.QueryThreadContext;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.Pairs;
+import org.roaringbitmap.PeekableIntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * Inverted-index-based operator for single-column distinct queries on a 
single segment.
+ *
+ * <p>Supports three execution paths, chosen at runtime:
+ * <ul>
+ *   <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter 
bitmap against contiguous doc ranges.
+ *       Cost ~ O(cardinality + filteredDocs). Always chosen when the column 
has a sorted forward index.</li>
+ *   <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and 
uses inverted index bitmap intersections
+ *       to check filter membership. Avoids the projection pipeline entirely. 
Chosen by cost heuristic when dictionary
+ *       cardinality is much smaller than the filtered doc count.</li>
+ *   <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor 
to scan filtered docs.
+ *       Used when the cost heuristic determines scanning is cheaper.</li>
+ * </ul>
+ *
+ * <p>Enabled via the {@code useIndexBasedDistinctOperator} query option. The 
cost ratio can be tuned
+ * via the {@code invertedIndexDistinctCostRatio} query option.
+ */
+public class InvertedIndexDistinctOperator extends 
BaseOperator<DistinctResultsBlock> {
+  private static final String EXPLAIN_NAME = "DISTINCT_INVERTED_INDEX";
+  private static final String EXPLAIN_NAME_SORTED_INDEX = 
"DISTINCT_SORTED_INDEX";
+  private static final String EXPLAIN_NAME_SCAN_FALLBACK = "DISTINCT";
+
+  private final IndexSegment _indexSegment;
+  private final SegmentContext _segmentContext;
+  private final QueryContext _queryContext;
+  private final BaseFilterOperator _filterOperator;
+  private final DataSource _dataSource;
+  private final Dictionary _dictionary;
+  private final InvertedIndexReader<?> _invertedIndexReader;
+
+  // Scan path: created lazily when scan fallback is chosen
+  private BaseProjectOperator<?> _projectOperator;
+
+  // Cached filter bitmap from the heuristic phase, reused by 
buildFilteredDocIds() to avoid
+  // a redundant getBitmaps().reduce() call.
+  private ImmutableRoaringBitmap _cachedFilterBitmap;
+
+  // Execution tracking
+  private boolean _usedInvertedIndexPath = false;
+  private int _numDocsScanned = 0;
+  private int _numEntriesExamined = 0;
+  private long _numEntriesScannedInFilter = 0;
+
+  /**
+   * Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode) 
must verify that the column
+   * has both a dictionary and an inverted index before constructing this 
operator.
+   */
+  public InvertedIndexDistinctOperator(IndexSegment indexSegment, 
SegmentContext segmentContext,
+      QueryContext queryContext, BaseFilterOperator filterOperator, DataSource 
dataSource) {
+    _indexSegment = indexSegment;
+    _segmentContext = segmentContext;
+    _queryContext = queryContext;
+    _filterOperator = filterOperator;
+    _dataSource = dataSource;
+    _dictionary = dataSource.getDictionary();
+    _invertedIndexReader = dataSource.getInvertedIndex();
+  }
+
+  @Override
+  protected DistinctResultsBlock getNextBlock() {
+    // Sorted index: always use the sorted path — O(cardinality + 
filteredDocs) merge iteration
+    if (_invertedIndexReader instanceof SortedIndexReader) {
+      _usedInvertedIndexPath = true;
+      return executeSortedIndexPath((SortedIndexReader<?>) 
_invertedIndexReader);
+    }
+    // Bitmap inverted index: use cost heuristic to decide
+    if (shouldUseBitmapInvertedIndex()) {
+      _usedInvertedIndexPath = true;
+      return executeInvertedIndexPath();
+    }
+    return executeScanPath();
+  }
+
+  // ==================== Cost Heuristic ====================
+
+  /**
+   * Default cost ratios for the inverted-index-based distinct heuristic, 
keyed by dictionary cardinality threshold.
+   * The inverted index path is chosen when {@code dictionaryCardinality * 
costRatio <= filteredDocCount}.
+   *
+   * <p>The cost ratio accounts for the per-entry bitmap intersection cost 
relative to the per-doc scan cost.
+   * For low-cardinality dictionaries, each bitmap is dense and {@code 
intersects()} is fast, but there are few
+   * entries so any unnecessary intersection is relatively expensive vs. 
scanning a small filtered doc set.
+   * For high-cardinality dictionaries, bitmaps are sparser and {@code 
intersects()} is slower per entry,
+   * but the scan path also becomes cheaper (fewer docs per value), so a lower 
ratio suffices.
+   *
+   * <p>Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the 
crossover points:
+   * <ul>
+   *   <li>dictCard &le; 1K:  costRatio=30 — inverted index wins when 
filteredDocs &ge; ~30x dictCard</li>
+   *   <li>dictCard &le; 10K: costRatio=10 — inverted index wins when 
filteredDocs &ge; ~10x dictCard</li>
+   *   <li>dictCard &gt; 10K: costRatio=6  — inverted index wins when 
filteredDocs &ge; ~6x dictCard</li>
+   * </ul>
+   *
+   * <p>Can be overridden at query time via the query option {@code 
invertedIndexDistinctCostRatio}.
+   */
+  static final NavigableMap<Integer, Integer> 
DEFAULT_COST_RATIO_BY_CARDINALITY;
+
+  static {
+    TreeMap<Integer, Integer> map = new TreeMap<>();
+    map.put(0, 30);       // dictCard <= 1000: costRatio = 30
+    map.put(1_001, 10);   // dictCard 1001..10000: costRatio = 10
+    map.put(10_001, 6);   // dictCard > 10000: costRatio = 6
+    DEFAULT_COST_RATIO_BY_CARDINALITY = 
Collections.unmodifiableNavigableMap(map);
+  }
+
+  static int getDefaultCostRatio(int dictionaryCardinality) {
+    return 
DEFAULT_COST_RATIO_BY_CARDINALITY.floorEntry(dictionaryCardinality).getValue();
+  }
+
+  private boolean shouldUseBitmapInvertedIndex() {
+    int dictionaryCardinality = _dictionary.length();
+    int filteredDocCount = estimateFilteredDocCount();
+    if (filteredDocCount == 0) {
+      return false;
+    }
+    if (dictionaryCardinality == 0) {
+      return true;
+    }
+    Integer costRatioOverride = 
QueryOptionsUtils.getInvertedIndexDistinctCostRatio(_queryContext.getQueryOptions());
+    int costRatio = costRatioOverride != null ? costRatioOverride : 
getDefaultCostRatio(dictionaryCardinality);
+    return (long) dictionaryCardinality * costRatio <= filteredDocCount;
+  }
+
+  /**
+   * Cheaply estimates the number of docs matching the filter without 
consuming the filter operator.
+   */
+  private int estimateFilteredDocCount() {
+    if (_filterOperator.isResultEmpty()) {
+      return 0;
+    }
+    if (_filterOperator.isResultMatchingAll()) {
+      return _indexSegment.getSegmentMetadata().getTotalDocs();
+    }
+    if (_filterOperator.canOptimizeCount()) {
+      return _filterOperator.getNumMatchingDocs();
+    }
+    if (_filterOperator.canProduceBitmaps()) {
+      _cachedFilterBitmap = _filterOperator.getBitmaps().reduce();
+      return _cachedFilterBitmap.getCardinality();
+    }
+    // Fallback for expression-based or complex filters that don't support 
count/bitmap estimation.
+    // Conservatively return a small estimate to bias toward the scan path. 
This avoids forcing the
+    // bitmap inverted index path, which performs O(dictionaryCardinality) 
intersects() checks and
+    // can be significantly slower than scanning for highly selective filters. 
The user can still
+    // force the inverted index path via invertedIndexDistinctCostRatio=1.
+    return 1;
+  }
+
+  // ==================== Scan Path (Fallback) ====================
+
+  private DistinctResultsBlock executeScanPath() {
+    BaseProjectOperator<?> projectOperator = getOrCreateProjectOperator();
+    DistinctExecutor executor = 
DistinctExecutorFactory.getDistinctExecutor(projectOperator, _queryContext);
+    ValueBlock valueBlock;
+    while ((valueBlock = projectOperator.nextBlock()) != null) {
+      _numDocsScanned += valueBlock.getNumDocs();
+      if (executor.process(valueBlock)) {
+        break;
+      }
+    }
+    return new DistinctResultsBlock(executor.getResult(), _queryContext);
+  }
+
+  private BaseProjectOperator<?> getOrCreateProjectOperator() {
+    if (_projectOperator == null) {
+      _projectOperator = new ProjectPlanNode(_segmentContext, _queryContext,
+          _queryContext.getSelectExpressions(), 
DocIdSetPlanNode.MAX_DOC_PER_CALL, _filterOperator).run();
+    }
+    return _projectOperator;
+  }
+
+  // ==================== Sorted Index Path ====================
+
+  /**
+   * Optimized path for sorted columns. Each dictId maps to a contiguous doc 
range [start, end].
+   * We merge-iterate the filter bitmap with the sorted ranges in 
O(cardinality + filteredDocs).
+   */
+  private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?> 
sortedReader) {
+    DictIdDistinctTable dictIdTable = createDictIdTable();
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    int dictLength = _dictionary.length();
+    int limit = _queryContext.getLimit();
+
+    ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
+    // Exclude null docs so the null placeholder value is not counted as a 
regular distinct value.
+    // Null is handled separately via hasFilteredNulls().
+    ImmutableRoaringBitmap nonNullFilteredDocIds = 
excludeNullDocs(filteredDocIds);
+
+    if (nonNullFilteredDocIds == null) {
+      // No filter, no null exclusion — every dictionary value is present
+      int dictId;
+      for (dictId = 0; dictId < dictLength; dictId++) {
+        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME_SORTED_INDEX);
+        boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+        if (done || (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit)) {
+          dictId++;
+          break;
+        }
+      }
+      _numEntriesExamined = dictId;
+    } else if (!nonNullFilteredDocIds.isEmpty()) {
+      // Merge-iterate: walk non-null filter bitmap and sorted ranges together.
+      // Both are in sorted order, so this is O(cardinality + filteredDocs).
+      PeekableIntIterator filterIter = nonNullFilteredDocIds.getIntIterator();
+      int dictId;
+      for (dictId = 0; dictId < dictLength && filterIter.hasNext(); dictId++) {
+        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME_SORTED_INDEX);
+        Pairs.IntPair range = sortedReader.getDocIds(dictId);
+        int startDocId = range.getLeft();
+        int endDocId = range.getRight(); // inclusive
+
+        // Skip filter docs before this range
+        filterIter.advanceIfNeeded(startDocId);
+
+        // Check if any non-null filter doc falls within this range
+        if (filterIter.hasNext() && filterIter.peekNext() <= endDocId) {
+          boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+          if (done || (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit)) {
+            _numEntriesExamined = dictId + 1;
+            boolean hasNull = hasFilteredNulls(filteredDocIds);
+            return new DistinctResultsBlock(convertDictIdTable(dictIdTable, 
hasNull), _queryContext);
+          }
+          // Advance past the current range for next dictId
+          filterIter.advanceIfNeeded(endDocId + 1);
+        }
+      }
+      _numEntriesExamined = dictId;
+    }
+
+    boolean hasNull = hasFilteredNulls(filteredDocIds);
+    return new DistinctResultsBlock(convertDictIdTable(dictIdTable, hasNull), 
_queryContext);
+  }
+
+  // ==================== Bitmap Inverted Index Path ====================
+
+  private DistinctResultsBlock executeInvertedIndexPath() {
+    ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
+    // Exclude null docs so the null placeholder value is not counted as a 
regular distinct value.
+    // Null is handled separately via hasFilteredNulls().
+    ImmutableRoaringBitmap nonNullFilteredDocIds = 
excludeNullDocs(filteredDocIds);
+    DictIdDistinctTable dictIdTable = createDictIdTable();
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    int dictLength = _dictionary.length();
+    int limit = _queryContext.getLimit();
+
+    int dictId;
+    for (dictId = 0; dictId < dictLength; dictId++) {
+      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME);
+
+      // SortedIndexReader is handled separately in getNextBlock(), so this 
path only sees bitmap inverted indexes
+      // whose getDocIds() returns ImmutableRoaringBitmap.
+      ImmutableRoaringBitmap docIds = (ImmutableRoaringBitmap) 
_invertedIndexReader.getDocIds(dictId);
+      if (docIds.isEmpty()) {
+        continue;
+      }
+
+      // Use intersects() for early termination instead of computing full 
intersection.
+      // intersects() short-circuits on the first common element, which is 
orders of magnitude
+      // faster than RoaringBitmap.and() especially for low-cardinality 
(dense) bitmaps.
+      // Use the non-null filter bitmap to skip the null placeholder value.
+      boolean includeValue;
+      if (nonNullFilteredDocIds == null) {
+        includeValue = true;
+      } else {
+        includeValue = ImmutableRoaringBitmap.intersects(docIds, 
nonNullFilteredDocIds);
+      }
+
+      if (includeValue) {
+        boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+        if (done) {
+          dictId++;
+          break;
+        }
+      }
+
+      if (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit) {
+        dictId++;
+        break;
+      }
+    }
+    _numEntriesExamined = dictId;
+
+    boolean hasNull = hasFilteredNulls(filteredDocIds);
+    return new DistinctResultsBlock(convertDictIdTable(dictIdTable, hasNull), 
_queryContext);
+  }
+
+  @Nullable
+  private ImmutableRoaringBitmap buildFilteredDocIds() {
+    if (_filterOperator.isResultMatchingAll()) {
+      return null;
+    }
+
+    if (_cachedFilterBitmap != null) {
+      return _cachedFilterBitmap;
+    }
+
+    if (_filterOperator.canProduceBitmaps()) {
+      return _filterOperator.getBitmaps().reduce();
+    }
+
+    if (_filterOperator.isResultEmpty()) {
+      return new MutableRoaringBitmap();
+    }
+
+    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+    DocIdSetPlanNode docIdSetPlanNode =
+        new DocIdSetPlanNode(_segmentContext, _queryContext, 
DocIdSetPlanNode.MAX_DOC_PER_CALL, _filterOperator);
+    var docIdSetOperator = docIdSetPlanNode.run();
+    DocIdSetBlock block;
+    while ((block = docIdSetOperator.nextBlock()) != null) {
+      int[] docIds = block.getDocIds();
+      int length = block.getLength();
+      for (int i = 0; i < length; i++) {
+        bitmap.add(docIds[i]);
+      }
+    }
+    _numEntriesScannedInFilter = 
docIdSetOperator.getExecutionStatistics().getNumEntriesScannedInFilter();
+    return bitmap;
+  }
+
+  private DictIdDistinctTable createDictIdTable() {
+    ExpressionContext expr = _queryContext.getSelectExpressions().get(0);
+    String column = expr.getIdentifier();
+    DataSourceMetadata dataSourceMetadata = 
_dataSource.getDataSourceMetadata();
+    DataSchema dataSchema = new DataSchema(new String[]{column},
+        new 
ColumnDataType[]{ColumnDataType.fromDataTypeSV(dataSourceMetadata.getDataType())});
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    return new DictIdDistinctTable(dataSchema, _queryContext.getLimit(), 
_queryContext.isNullHandlingEnabled(),
+        orderByExpression);
+  }
+
+  /**
+   * Adds a dictId to the table. Returns true if the table is full (no ORDER 
BY, limit reached).
+   */
+  private static boolean addDictId(DictIdDistinctTable table, int dictId,
+      @Nullable OrderByExpressionContext orderByExpression) {
+    if (table.hasLimit()) {
+      if (orderByExpression != null) {
+        table.addWithOrderBy(dictId);
+        return false;
+      } else {
+        return table.addWithoutOrderBy(dictId);
+      }
+    } else {
+      table.addUnbounded(dictId);
+      return false;
+    }
+  }
+
+  /**
+   * Checks whether any filtered doc has a null value for this column.
+   * Nulls are not in the dictionary, so they must be checked separately via 
the null value vector.
+   */
+  private boolean hasFilteredNulls(@Nullable ImmutableRoaringBitmap 
filteredDocIds) {
+    if (!_queryContext.isNullHandlingEnabled()) {
+      return false;
+    }
+    NullValueVectorReader nullReader = _dataSource.getNullValueVector();
+    if (nullReader == null) {
+      return false;
+    }
+    ImmutableRoaringBitmap nullBitmap = nullReader.getNullBitmap();
+    if (nullBitmap == null || nullBitmap.isEmpty()) {
+      return false;
+    }
+    // filteredDocIds == null means match-all
+    return filteredDocIds == null || 
ImmutableRoaringBitmap.intersects(nullBitmap, filteredDocIds);
+  }
+
+  /**
+   * Returns a filter bitmap that excludes null docs. This prevents the null 
placeholder value
+   * (e.g., Integer.MIN_VALUE) from being counted as a regular distinct value 
during dictionary
+   * iteration. Null is handled separately via {@link 
#hasFilteredNulls(ImmutableRoaringBitmap)}.
+   *
+   * <p>When null handling is disabled or no nulls exist, returns the original 
bitmap unchanged.
+   * When filteredDocIds is null (match-all), returns all docs minus null docs.
+   */
+  @Nullable
+  private ImmutableRoaringBitmap excludeNullDocs(@Nullable 
ImmutableRoaringBitmap filteredDocIds) {
+    if (!_queryContext.isNullHandlingEnabled()) {
+      return filteredDocIds;
+    }
+    NullValueVectorReader nullReader = _dataSource.getNullValueVector();
+    if (nullReader == null) {
+      return filteredDocIds;
+    }
+    ImmutableRoaringBitmap nullBitmap = nullReader.getNullBitmap();
+    if (nullBitmap == null || nullBitmap.isEmpty()) {
+      return filteredDocIds;
+    }
+    if (filteredDocIds == null) {
+      // Match-all: build bitmap of all docs minus null docs
+      MutableRoaringBitmap allDocs = new MutableRoaringBitmap();
+      allDocs.add(0L, _indexSegment.getSegmentMetadata().getTotalDocs());
+      return ImmutableRoaringBitmap.andNot(allDocs, nullBitmap);
+    }
+    return ImmutableRoaringBitmap.andNot(filteredDocIds, nullBitmap);

Review Comment:
   When `filteredDocIds == null` (match-all) and null handling is enabled with 
a non-empty null bitmap, this method materializes an “all docs” bitmap and then 
computes `andNot(allDocs, nullBitmap)`. That extra work also prevents the 
sorted-path fast branch (`nonNullFilteredDocIds == null`) and can make 
`executeSortedIndexPath()` iterate a bitmap that effectively contains almost 
every doc. Consider special-casing match-all to avoid building a full non-null 
filter bitmap (e.g., treat it as no filter and only handle the null-placeholder 
dictId separately).



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java:
##########
@@ -0,0 +1,639 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import com.google.common.base.CaseFormat;
+import it.unimi.dsi.fastutil.ints.IntIterator;
+import java.util.Collections;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.OrderByExpressionContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.common.utils.config.QueryOptionsUtils;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.BaseProjectOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.ExplainAttributeBuilder;
+import org.apache.pinot.core.operator.blocks.DocIdSetBlock;
+import org.apache.pinot.core.operator.blocks.ValueBlock;
+import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.plan.DocIdSetPlanNode;
+import org.apache.pinot.core.plan.ProjectPlanNode;
+import org.apache.pinot.core.query.distinct.DistinctExecutor;
+import org.apache.pinot.core.query.distinct.DistinctExecutorFactory;
+import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable;
+import org.apache.pinot.core.query.distinct.table.BytesDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DictIdDistinctTable;
+import org.apache.pinot.core.query.distinct.table.DistinctTable;
+import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable;
+import org.apache.pinot.core.query.distinct.table.FloatDistinctTable;
+import org.apache.pinot.core.query.distinct.table.IntDistinctTable;
+import org.apache.pinot.core.query.distinct.table.LongDistinctTable;
+import org.apache.pinot.core.query.distinct.table.StringDistinctTable;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
+import org.apache.pinot.segment.spi.index.reader.Dictionary;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader;
+import org.apache.pinot.segment.spi.index.reader.SortedIndexReader;
+import org.apache.pinot.spi.query.QueryThreadContext;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.Pairs;
+import org.roaringbitmap.PeekableIntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * Inverted-index-based operator for single-column distinct queries on a 
single segment.
+ *
+ * <p>Supports three execution paths, chosen at runtime:
+ * <ul>
+ *   <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter 
bitmap against contiguous doc ranges.
+ *       Cost ~ O(cardinality + filteredDocs). Always chosen when the column 
has a sorted forward index.</li>
+ *   <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and 
uses inverted index bitmap intersections
+ *       to check filter membership. Avoids the projection pipeline entirely. 
Chosen by cost heuristic when dictionary
+ *       cardinality is much smaller than the filtered doc count.</li>
+ *   <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor 
to scan filtered docs.
+ *       Used when the cost heuristic determines scanning is cheaper.</li>
+ * </ul>
+ *
+ * <p>Enabled via the {@code useIndexBasedDistinctOperator} query option. The 
cost ratio can be tuned
+ * via the {@code invertedIndexDistinctCostRatio} query option.
+ */
+public class InvertedIndexDistinctOperator extends 
BaseOperator<DistinctResultsBlock> {
+  private static final String EXPLAIN_NAME = "DISTINCT_INVERTED_INDEX";
+  private static final String EXPLAIN_NAME_SORTED_INDEX = 
"DISTINCT_SORTED_INDEX";
+  private static final String EXPLAIN_NAME_SCAN_FALLBACK = "DISTINCT";
+
+  private final IndexSegment _indexSegment;
+  private final SegmentContext _segmentContext;
+  private final QueryContext _queryContext;
+  private final BaseFilterOperator _filterOperator;
+  private final DataSource _dataSource;
+  private final Dictionary _dictionary;
+  private final InvertedIndexReader<?> _invertedIndexReader;
+
+  // Scan path: created lazily when scan fallback is chosen
+  private BaseProjectOperator<?> _projectOperator;
+
+  // Cached filter bitmap from the heuristic phase, reused by 
buildFilteredDocIds() to avoid
+  // a redundant getBitmaps().reduce() call.
+  private ImmutableRoaringBitmap _cachedFilterBitmap;
+
+  // Execution tracking
+  private boolean _usedInvertedIndexPath = false;
+  private int _numDocsScanned = 0;
+  private int _numEntriesExamined = 0;
+  private long _numEntriesScannedInFilter = 0;
+
+  /**
+   * Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode) 
must verify that the column
+   * has both a dictionary and an inverted index before constructing this 
operator.
+   */
+  public InvertedIndexDistinctOperator(IndexSegment indexSegment, 
SegmentContext segmentContext,
+      QueryContext queryContext, BaseFilterOperator filterOperator, DataSource 
dataSource) {
+    _indexSegment = indexSegment;
+    _segmentContext = segmentContext;
+    _queryContext = queryContext;
+    _filterOperator = filterOperator;
+    _dataSource = dataSource;
+    _dictionary = dataSource.getDictionary();
+    _invertedIndexReader = dataSource.getInvertedIndex();
+  }
+
+  @Override
+  protected DistinctResultsBlock getNextBlock() {
+    // Sorted index: always use the sorted path — O(cardinality + 
filteredDocs) merge iteration
+    if (_invertedIndexReader instanceof SortedIndexReader) {
+      _usedInvertedIndexPath = true;
+      return executeSortedIndexPath((SortedIndexReader<?>) 
_invertedIndexReader);
+    }
+    // Bitmap inverted index: use cost heuristic to decide
+    if (shouldUseBitmapInvertedIndex()) {
+      _usedInvertedIndexPath = true;
+      return executeInvertedIndexPath();
+    }
+    return executeScanPath();
+  }
+
+  // ==================== Cost Heuristic ====================
+
+  /**
+   * Default cost ratios for the inverted-index-based distinct heuristic, 
keyed by dictionary cardinality threshold.
+   * The inverted index path is chosen when {@code dictionaryCardinality * 
costRatio <= filteredDocCount}.
+   *
+   * <p>The cost ratio accounts for the per-entry bitmap intersection cost 
relative to the per-doc scan cost.
+   * For low-cardinality dictionaries, each bitmap is dense and {@code 
intersects()} is fast, but there are few
+   * entries so any unnecessary intersection is relatively expensive vs. 
scanning a small filtered doc set.
+   * For high-cardinality dictionaries, bitmaps are sparser and {@code 
intersects()} is slower per entry,
+   * but the scan path also becomes cheaper (fewer docs per value), so a lower 
ratio suffices.
+   *
+   * <p>Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the 
crossover points:
+   * <ul>
+   *   <li>dictCard &le; 1K:  costRatio=30 — inverted index wins when 
filteredDocs &ge; ~30x dictCard</li>
+   *   <li>dictCard &le; 10K: costRatio=10 — inverted index wins when 
filteredDocs &ge; ~10x dictCard</li>
+   *   <li>dictCard &gt; 10K: costRatio=6  — inverted index wins when 
filteredDocs &ge; ~6x dictCard</li>
+   * </ul>
+   *
+   * <p>Can be overridden at query time via the query option {@code 
invertedIndexDistinctCostRatio}.
+   */
+  static final NavigableMap<Integer, Integer> 
DEFAULT_COST_RATIO_BY_CARDINALITY;
+
+  static {
+    TreeMap<Integer, Integer> map = new TreeMap<>();
+    map.put(0, 30);       // dictCard <= 1000: costRatio = 30
+    map.put(1_001, 10);   // dictCard 1001..10000: costRatio = 10
+    map.put(10_001, 6);   // dictCard > 10000: costRatio = 6
+    DEFAULT_COST_RATIO_BY_CARDINALITY = 
Collections.unmodifiableNavigableMap(map);
+  }
+
+  static int getDefaultCostRatio(int dictionaryCardinality) {
+    return 
DEFAULT_COST_RATIO_BY_CARDINALITY.floorEntry(dictionaryCardinality).getValue();
+  }
+
+  private boolean shouldUseBitmapInvertedIndex() {
+    int dictionaryCardinality = _dictionary.length();
+    int filteredDocCount = estimateFilteredDocCount();
+    if (filteredDocCount == 0) {
+      return false;
+    }
+    if (dictionaryCardinality == 0) {
+      return true;
+    }
+    Integer costRatioOverride = 
QueryOptionsUtils.getInvertedIndexDistinctCostRatio(_queryContext.getQueryOptions());
+    int costRatio = costRatioOverride != null ? costRatioOverride : 
getDefaultCostRatio(dictionaryCardinality);
+    return (long) dictionaryCardinality * costRatio <= filteredDocCount;
+  }
+
+  /**
+   * Cheaply estimates the number of docs matching the filter without 
consuming the filter operator.
+   */
+  private int estimateFilteredDocCount() {
+    if (_filterOperator.isResultEmpty()) {
+      return 0;
+    }
+    if (_filterOperator.isResultMatchingAll()) {
+      return _indexSegment.getSegmentMetadata().getTotalDocs();
+    }
+    if (_filterOperator.canOptimizeCount()) {
+      return _filterOperator.getNumMatchingDocs();
+    }
+    if (_filterOperator.canProduceBitmaps()) {
+      _cachedFilterBitmap = _filterOperator.getBitmaps().reduce();
+      return _cachedFilterBitmap.getCardinality();
+    }
+    // Fallback for expression-based or complex filters that don't support 
count/bitmap estimation.
+    // Conservatively return a small estimate to bias toward the scan path. 
This avoids forcing the
+    // bitmap inverted index path, which performs O(dictionaryCardinality) 
intersects() checks and
+    // can be significantly slower than scanning for highly selective filters. 
The user can still
+    // force the inverted index path via invertedIndexDistinctCostRatio=1.
+    return 1;
+  }
+
+  // ==================== Scan Path (Fallback) ====================
+
+  private DistinctResultsBlock executeScanPath() {
+    BaseProjectOperator<?> projectOperator = getOrCreateProjectOperator();
+    DistinctExecutor executor = 
DistinctExecutorFactory.getDistinctExecutor(projectOperator, _queryContext);
+    ValueBlock valueBlock;
+    while ((valueBlock = projectOperator.nextBlock()) != null) {
+      _numDocsScanned += valueBlock.getNumDocs();
+      if (executor.process(valueBlock)) {
+        break;
+      }
+    }
+    return new DistinctResultsBlock(executor.getResult(), _queryContext);
+  }
+
+  private BaseProjectOperator<?> getOrCreateProjectOperator() {
+    if (_projectOperator == null) {
+      _projectOperator = new ProjectPlanNode(_segmentContext, _queryContext,
+          _queryContext.getSelectExpressions(), 
DocIdSetPlanNode.MAX_DOC_PER_CALL, _filterOperator).run();
+    }
+    return _projectOperator;
+  }
+
+  // ==================== Sorted Index Path ====================
+
+  /**
+   * Optimized path for sorted columns. Each dictId maps to a contiguous doc 
range [start, end].
+   * We merge-iterate the filter bitmap with the sorted ranges in 
O(cardinality + filteredDocs).
+   */
+  private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?> 
sortedReader) {
+    DictIdDistinctTable dictIdTable = createDictIdTable();
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    int dictLength = _dictionary.length();
+    int limit = _queryContext.getLimit();
+
+    ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
+    // Exclude null docs so the null placeholder value is not counted as a 
regular distinct value.
+    // Null is handled separately via hasFilteredNulls().
+    ImmutableRoaringBitmap nonNullFilteredDocIds = 
excludeNullDocs(filteredDocIds);
+
+    if (nonNullFilteredDocIds == null) {
+      // No filter, no null exclusion — every dictionary value is present
+      int dictId;
+      for (dictId = 0; dictId < dictLength; dictId++) {
+        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME_SORTED_INDEX);
+        boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+        if (done || (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit)) {
+          dictId++;
+          break;
+        }
+      }
+      _numEntriesExamined = dictId;
+    } else if (!nonNullFilteredDocIds.isEmpty()) {
+      // Merge-iterate: walk non-null filter bitmap and sorted ranges together.
+      // Both are in sorted order, so this is O(cardinality + filteredDocs).
+      PeekableIntIterator filterIter = nonNullFilteredDocIds.getIntIterator();
+      int dictId;
+      for (dictId = 0; dictId < dictLength && filterIter.hasNext(); dictId++) {
+        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME_SORTED_INDEX);
+        Pairs.IntPair range = sortedReader.getDocIds(dictId);
+        int startDocId = range.getLeft();
+        int endDocId = range.getRight(); // inclusive
+
+        // Skip filter docs before this range
+        filterIter.advanceIfNeeded(startDocId);
+
+        // Check if any non-null filter doc falls within this range
+        if (filterIter.hasNext() && filterIter.peekNext() <= endDocId) {
+          boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+          if (done || (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit)) {
+            _numEntriesExamined = dictId + 1;
+            boolean hasNull = hasFilteredNulls(filteredDocIds);
+            return new DistinctResultsBlock(convertDictIdTable(dictIdTable, 
hasNull), _queryContext);
+          }
+          // Advance past the current range for next dictId
+          filterIter.advanceIfNeeded(endDocId + 1);
+        }
+      }
+      _numEntriesExamined = dictId;
+    }
+
+    boolean hasNull = hasFilteredNulls(filteredDocIds);
+    return new DistinctResultsBlock(convertDictIdTable(dictIdTable, hasNull), 
_queryContext);
+  }
+
+  // ==================== Bitmap Inverted Index Path ====================
+
+  private DistinctResultsBlock executeInvertedIndexPath() {
+    ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
+    // Exclude null docs so the null placeholder value is not counted as a 
regular distinct value.
+    // Null is handled separately via hasFilteredNulls().
+    ImmutableRoaringBitmap nonNullFilteredDocIds = 
excludeNullDocs(filteredDocIds);
+    DictIdDistinctTable dictIdTable = createDictIdTable();
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    int dictLength = _dictionary.length();
+    int limit = _queryContext.getLimit();
+
+    int dictId;
+    for (dictId = 0; dictId < dictLength; dictId++) {
+      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, 
EXPLAIN_NAME);
+
+      // SortedIndexReader is handled separately in getNextBlock(), so this 
path only sees bitmap inverted indexes
+      // whose getDocIds() returns ImmutableRoaringBitmap.
+      ImmutableRoaringBitmap docIds = (ImmutableRoaringBitmap) 
_invertedIndexReader.getDocIds(dictId);
+      if (docIds.isEmpty()) {
+        continue;
+      }
+
+      // Use intersects() for early termination instead of computing full 
intersection.
+      // intersects() short-circuits on the first common element, which is 
orders of magnitude
+      // faster than RoaringBitmap.and() especially for low-cardinality 
(dense) bitmaps.
+      // Use the non-null filter bitmap to skip the null placeholder value.
+      boolean includeValue;
+      if (nonNullFilteredDocIds == null) {
+        includeValue = true;
+      } else {
+        includeValue = ImmutableRoaringBitmap.intersects(docIds, 
nonNullFilteredDocIds);
+      }
+
+      if (includeValue) {
+        boolean done = addDictId(dictIdTable, dictId, orderByExpression);
+        if (done) {
+          dictId++;
+          break;
+        }
+      }
+
+      if (orderByExpression == null && dictIdTable.hasLimit() && 
dictIdTable.size() >= limit) {
+        dictId++;
+        break;
+      }
+    }
+    _numEntriesExamined = dictId;
+
+    boolean hasNull = hasFilteredNulls(filteredDocIds);
+    return new DistinctResultsBlock(convertDictIdTable(dictIdTable, hasNull), 
_queryContext);
+  }
+
+  @Nullable
+  private ImmutableRoaringBitmap buildFilteredDocIds() {
+    if (_filterOperator.isResultMatchingAll()) {
+      return null;
+    }
+
+    if (_cachedFilterBitmap != null) {
+      return _cachedFilterBitmap;
+    }
+
+    if (_filterOperator.canProduceBitmaps()) {
+      return _filterOperator.getBitmaps().reduce();
+    }
+
+    if (_filterOperator.isResultEmpty()) {
+      return new MutableRoaringBitmap();
+    }
+
+    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+    DocIdSetPlanNode docIdSetPlanNode =
+        new DocIdSetPlanNode(_segmentContext, _queryContext, 
DocIdSetPlanNode.MAX_DOC_PER_CALL, _filterOperator);
+    var docIdSetOperator = docIdSetPlanNode.run();
+    DocIdSetBlock block;
+    while ((block = docIdSetOperator.nextBlock()) != null) {
+      int[] docIds = block.getDocIds();
+      int length = block.getLength();
+      for (int i = 0; i < length; i++) {
+        bitmap.add(docIds[i]);
+      }
+    }
+    _numEntriesScannedInFilter = 
docIdSetOperator.getExecutionStatistics().getNumEntriesScannedInFilter();
+    return bitmap;
+  }
+
+  private DictIdDistinctTable createDictIdTable() {
+    ExpressionContext expr = _queryContext.getSelectExpressions().get(0);
+    String column = expr.getIdentifier();
+    DataSourceMetadata dataSourceMetadata = 
_dataSource.getDataSourceMetadata();
+    DataSchema dataSchema = new DataSchema(new String[]{column},
+        new 
ColumnDataType[]{ColumnDataType.fromDataTypeSV(dataSourceMetadata.getDataType())});
+    OrderByExpressionContext orderByExpression =
+        _queryContext.getOrderByExpressions() != null ? 
_queryContext.getOrderByExpressions().get(0) : null;
+    return new DictIdDistinctTable(dataSchema, _queryContext.getLimit(), 
_queryContext.isNullHandlingEnabled(),
+        orderByExpression);
+  }
+
+  /**
+   * Adds a dictId to the table. Returns true if the table is full (no ORDER 
BY, limit reached).
+   */
+  private static boolean addDictId(DictIdDistinctTable table, int dictId,
+      @Nullable OrderByExpressionContext orderByExpression) {
+    if (table.hasLimit()) {
+      if (orderByExpression != null) {
+        table.addWithOrderBy(dictId);
+        return false;
+      } else {
+        return table.addWithoutOrderBy(dictId);
+      }
+    } else {
+      table.addUnbounded(dictId);
+      return false;
+    }
+  }
+
+  /**
+   * Checks whether any filtered doc has a null value for this column.
+   * Nulls are not in the dictionary, so they must be checked separately via 
the null value vector.
+   */
+  private boolean hasFilteredNulls(@Nullable ImmutableRoaringBitmap 
filteredDocIds) {
+    if (!_queryContext.isNullHandlingEnabled()) {
+      return false;
+    }
+    NullValueVectorReader nullReader = _dataSource.getNullValueVector();
+    if (nullReader == null) {
+      return false;
+    }
+    ImmutableRoaringBitmap nullBitmap = nullReader.getNullBitmap();
+    if (nullBitmap == null || nullBitmap.isEmpty()) {
+      return false;
+    }
+    // filteredDocIds == null means match-all
+    return filteredDocIds == null || 
ImmutableRoaringBitmap.intersects(nullBitmap, filteredDocIds);
+  }
+
+  /**
+   * Returns a filter bitmap that excludes null docs. This prevents the null 
placeholder value
+   * (e.g., Integer.MIN_VALUE) from being counted as a regular distinct value 
during dictionary
+   * iteration. Null is handled separately via {@link 
#hasFilteredNulls(ImmutableRoaringBitmap)}.
+   *
+   * <p>When null handling is disabled or no nulls exist, returns the original 
bitmap unchanged.
+   * When filteredDocIds is null (match-all), returns all docs minus null docs.
+   */
+  @Nullable
+  private ImmutableRoaringBitmap excludeNullDocs(@Nullable 
ImmutableRoaringBitmap filteredDocIds) {
+    if (!_queryContext.isNullHandlingEnabled()) {
+      return filteredDocIds;
+    }
+    NullValueVectorReader nullReader = _dataSource.getNullValueVector();
+    if (nullReader == null) {
+      return filteredDocIds;
+    }
+    ImmutableRoaringBitmap nullBitmap = nullReader.getNullBitmap();
+    if (nullBitmap == null || nullBitmap.isEmpty()) {
+      return filteredDocIds;
+    }
+    if (filteredDocIds == null) {
+      // Match-all: build bitmap of all docs minus null docs
+      MutableRoaringBitmap allDocs = new MutableRoaringBitmap();
+      allDocs.add(0L, _indexSegment.getSegmentMetadata().getTotalDocs());
+      return ImmutableRoaringBitmap.andNot(allDocs, nullBitmap);
+    }
+    return ImmutableRoaringBitmap.andNot(filteredDocIds, nullBitmap);
+  }
+
+  /**
+   * Converts dictId-based distinct table to a typed distinct table by 
resolving dictionary values.
+   * Similar to {@link
+   * 
org.apache.pinot.core.query.distinct.dictionary.DictionaryBasedSingleColumnDistinctExecutor#getResult()}.
+   */
+  private DistinctTable convertDictIdTable(DictIdDistinctTable dictIdTable, 
boolean hasNull) {
+    DataSchema dataSchema = dictIdTable.getDataSchema();
+    int limit = _queryContext.getLimit();
+    boolean nullHandlingEnabled = _queryContext.isNullHandlingEnabled();
+    OrderByExpressionContext orderByExpression = 
dictIdTable.getOrderByExpression();
+    IntIterator dictIdIterator = dictIdTable.getValueSet().iterator();
+    switch (_dictionary.getValueType()) {
+      case INT: {
+        IntDistinctTable table = new IntDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getIntValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case LONG: {
+        LongDistinctTable table = new LongDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getLongValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case FLOAT: {
+        FloatDistinctTable table = new FloatDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getFloatValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case DOUBLE: {
+        DoubleDistinctTable table = new DoubleDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getDoubleValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case BIG_DECIMAL: {
+        BigDecimalDistinctTable table =
+            new BigDecimalDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getBigDecimalValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case STRING: {
+        StringDistinctTable table = new StringDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          
table.addUnbounded(_dictionary.getStringValue(dictIdIterator.nextInt()));
+        }
+        if (hasNull) {
+          table.addNull();
+        }
+        return table;
+      }
+      case BYTES: {
+        BytesDistinctTable table = new BytesDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
+        while (dictIdIterator.hasNext()) {
+          table.addUnbounded(new 
ByteArray(_dictionary.getBytesValue(dictIdIterator.nextInt())));
+        }
+        if (hasNull) {
+          table.addNull();
+        }

Review Comment:
   Null handling + LIMIT semantics can break here: `hasNull` is determined only 
after collecting dictIds, and `convertDictIdTable()` calls `table.addNull()` 
after adding all non-null values. Because `addNull()` sets `_limitWithoutNull = 
_limit - 1`, doing it after the value set is already full can cause the final 
result to drop null for `SELECT DISTINCT ... LIMIT N` without `ORDER BY` (e.g., 
`_valueSet.size() == N` ⇒ `toResultTableWithoutOrderBy()` omits null). To match 
existing scan distinct behavior (which reserves a null slot via `addNull()` 
before adding values), determine whether null is present up front and reserve 
the slot before populating the table / applying the LIMIT.
   ```suggestion
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getIntValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case LONG: {
           LongDistinctTable table = new LongDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getLongValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case FLOAT: {
           FloatDistinctTable table = new FloatDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getFloatValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case DOUBLE: {
           DoubleDistinctTable table = new DoubleDistinctTable(dataSchema, 
limit, nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getDoubleValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case BIG_DECIMAL: {
           BigDecimalDistinctTable table =
               new BigDecimalDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getBigDecimalValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case STRING: {
           StringDistinctTable table = new StringDistinctTable(dataSchema, 
limit, nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             
table.addUnbounded(_dictionary.getStringValue(dictIdIterator.nextInt()));
           }
           return table;
         }
         case BYTES: {
           BytesDistinctTable table = new BytesDistinctTable(dataSchema, limit, 
nullHandlingEnabled, orderByExpression);
           if (hasNull) {
             table.addNull();
           }
           while (dictIdIterator.hasNext()) {
             table.addUnbounded(new 
ByteArray(_dictionary.getBytesValue(dictIdIterator.nextInt())));
           }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Add inverted-index-based distinct operator with runtime cost heuristic [pinot]

Reply via email to