This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e74087dea6 [SSE] Segment limit when order-by on group-by keys (#16297)
e74087dea6 is described below

commit e74087dea671a233658d5c4aaeae3e827b37b832
Author: Song Fu <[email protected]>
AuthorDate: Tue Jul 8 13:32:17 2025 -0700

    [SSE] Segment limit when order-by on group-by keys (#16297)
---
 .../pinot/core/operator/query/GroupByOperator.java       | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
index d28de96107..5b704ceb47 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
@@ -27,6 +27,7 @@ import java.util.stream.Collectors;
 import org.apache.pinot.common.metrics.ServerMeter;
 import org.apache.pinot.common.metrics.ServerMetrics;
 import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.OrderByExpressionContext;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.core.common.Operator;
 import org.apache.pinot.core.data.table.IntermediateRecord;
@@ -106,12 +107,14 @@ public class GroupByOperator extends 
BaseOperator<GroupByResultsBlock> {
   protected GroupByResultsBlock getNextBlock() {
     // Perform aggregation group-by on all the blocks
     GroupByExecutor groupByExecutor;
+    // TODO: pass trimGroupSize to executor, who creates the result holder
     if (_useStarTree) {
       groupByExecutor = new StarTreeGroupByExecutor(_queryContext, 
_groupByExpressions, _projectOperator);
     } else {
       groupByExecutor = new DefaultGroupByExecutor(_queryContext, 
_groupByExpressions, _projectOperator);
     }
     ValueBlock valueBlock;
+
     while ((valueBlock = _projectOperator.nextBlock()) != null) {
       _numDocsScanned += valueBlock.getNumDocs();
       groupByExecutor.process(valueBlock);
@@ -138,8 +141,17 @@ public class GroupByOperator extends 
BaseOperator<GroupByResultsBlock> {
     // TODO: Currently the groups are not trimmed if there is no ordering 
specified. Consider ordering on group-by
     //       columns if no ordering is specified.
     int minGroupTrimSize = _queryContext.getMinSegmentGroupTrimSize();
-    if (_queryContext.getOrderByExpressions() != null && minGroupTrimSize > 0) 
{
-      int trimSize = GroupByUtils.getTableCapacity(_queryContext.getLimit(), 
minGroupTrimSize);
+    int trimSize = -1;
+    List<OrderByExpressionContext> orderByExpressions = 
_queryContext.getOrderByExpressions();
+    if (!_queryContext.isUnsafeTrim()) {
+      // if orderby key is groupby key, and there's no having clause
+      // keep at most `limit` rows only
+      trimSize = _queryContext.getLimit();
+    } else if (orderByExpressions != null && minGroupTrimSize > 0) {
+      // max(minSegmentGroupTrimSize, 5 * LIMIT)
+      trimSize = GroupByUtils.getTableCapacity(_queryContext.getLimit(), 
minGroupTrimSize);
+    }
+    if (trimSize > 0) {
       if (groupByExecutor.getNumGroups() > trimSize) {
         TableResizer tableResizer = new TableResizer(_dataSchema, 
_queryContext);
         Collection<IntermediateRecord> intermediateRecords = 
groupByExecutor.trimGroupByResult(trimSize, tableResizer);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to