Copilot commented on code in PR #17247:
URL: https://github.com/apache/pinot/pull/17247#discussion_r2605037608


##########
pinot-core/src/main/java/org/apache/pinot/core/operator/query/DistinctOperator.java:
##########
@@ -40,31 +43,81 @@
  */
 public class DistinctOperator extends BaseOperator<DistinctResultsBlock> {
   private static final String EXPLAIN_NAME = "DISTINCT";
+  private static final int UNLIMITED_ROWS = Integer.MAX_VALUE;
 
   private final IndexSegment _indexSegment;
   private final QueryContext _queryContext;
   private final BaseProjectOperator<?> _projectOperator;
 
   private int _numDocsScanned = 0;
+  private final int _maxRowsInDistinct;
+  private final int _numRowsWithoutChangeInDistinct;
+  private int _numRowsWithoutNewDistinct = 0;
+  private boolean _hitMaxRowsLimit = false;
+  private boolean _hitNoChangeLimit = false;
 
   public DistinctOperator(IndexSegment indexSegment, QueryContext queryContext,
       BaseProjectOperator<?> projectOperator) {
     _indexSegment = indexSegment;
     _queryContext = queryContext;
     _projectOperator = projectOperator;
+    Map<String, String> queryOptions = queryContext.getQueryOptions();
+    if (queryOptions != null) {
+      Integer maxRowsInDistinct = 
QueryOptionsUtils.getMaxRowsInDistinct(queryOptions);
+      _maxRowsInDistinct = maxRowsInDistinct != null ? maxRowsInDistinct : 
UNLIMITED_ROWS;
+      Integer numRowsWithoutChange = 
QueryOptionsUtils.getNumRowsWithoutChangeInDistinct(queryOptions);
+      _numRowsWithoutChangeInDistinct =
+          numRowsWithoutChange != null ? numRowsWithoutChange : UNLIMITED_ROWS;
+    } else {
+      _maxRowsInDistinct = UNLIMITED_ROWS;
+      _numRowsWithoutChangeInDistinct = UNLIMITED_ROWS;
+    }
   }
 
   @Override
   protected DistinctResultsBlock getNextBlock() {
     DistinctExecutor executor = 
DistinctExecutorFactory.getDistinctExecutor(_projectOperator, _queryContext);
+    executor.setMaxRowsToProcess(_maxRowsInDistinct);
     ValueBlock valueBlock;
+    boolean enforceRowLimit = _maxRowsInDistinct != UNLIMITED_ROWS;
+    boolean enforceNoChangeLimit = _numRowsWithoutChangeInDistinct != 
UNLIMITED_ROWS;
     while ((valueBlock = _projectOperator.nextBlock()) != null) {
-      _numDocsScanned += valueBlock.getNumDocs();
-      if (executor.process(valueBlock)) {
+      if (enforceRowLimit && executor.getRemainingRowsToProcess() <= 0) {
+        _hitMaxRowsLimit = true;
+        break;
+      }
+      int rowsRemainingBefore = executor.getRemainingRowsToProcess();
+      int distinctCountBeforeBlock = enforceNoChangeLimit ? 
executor.getNumDistinctRowsCollected() : -1;
+      boolean satisfied = executor.process(valueBlock);
+      int rowsRemainingAfter = executor.getRemainingRowsToProcess();
+      int docsProcessedForLimit =
+          enforceRowLimit ? Math.max(0, rowsRemainingBefore - 
rowsRemainingAfter) : valueBlock.getNumDocs();

Review Comment:
   The calculation of `docsProcessedForLimit` uses `valueBlock.getNumDocs()` 
when row limits are not enforced, but when limits are active, it computes the 
difference in remaining rows. This creates inconsistent semantics: with limits, 
it counts rows consumed by the executor (which may be less than the block 
size), while without limits, it counts the full block. Consider unifying the 
logic to always derive processed rows from the executor's state or documenting 
why this asymmetry is intentional.
   ```suggestion
         int docsProcessedForLimit = Math.max(0, rowsRemainingBefore - 
rowsRemainingAfter);
   ```



##########
pinot-core/src/main/java/org/apache/pinot/core/query/distinct/DistinctExecutorFactory.java:
##########
@@ -55,7 +55,18 @@ private DistinctExecutorFactory() {
   public static DistinctExecutor getDistinctExecutor(BaseProjectOperator<?> 
projectOperator,
       QueryContext queryContext) {
     List<ExpressionContext> expressions = queryContext.getSelectExpressions();
-    int limit = queryContext.getLimit();
+    int queryLimit = queryContext.getLimit();
+    // The distinct table capacity is independent from the early-termination 
row budgets enforced by DistinctOperator.
+    // Keep a dedicated variable so it is clear we are only constraining how 
many unique rows we can store.
+    int tableCapacity = queryLimit;
+    if (queryContext.getQueryOptions() != null) {
+      Integer maxRowsInDistinct =
+          
org.apache.pinot.common.utils.config.QueryOptionsUtils.getMaxRowsInDistinct(queryContext.getQueryOptions());

Review Comment:
   The fully qualified class name 
`org.apache.pinot.common.utils.config.QueryOptionsUtils` should be imported at 
the top of the file instead of being used inline. This improves readability and 
follows standard Java conventions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to