This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e74087dea6 [SSE] Segment limit when order-by on group-by keys (#16297)
e74087dea6 is described below
commit e74087dea671a233658d5c4aaeae3e827b37b832
Author: Song Fu <[email protected]>
AuthorDate: Tue Jul 8 13:32:17 2025 -0700
[SSE] Segment limit when order-by on group-by keys (#16297)
---
.../pinot/core/operator/query/GroupByOperator.java | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
index d28de96107..5b704ceb47 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/GroupByOperator.java
@@ -27,6 +27,7 @@ import java.util.stream.Collectors;
import org.apache.pinot.common.metrics.ServerMeter;
import org.apache.pinot.common.metrics.ServerMetrics;
import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.OrderByExpressionContext;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.core.common.Operator;
import org.apache.pinot.core.data.table.IntermediateRecord;
@@ -106,12 +107,14 @@ public class GroupByOperator extends
BaseOperator<GroupByResultsBlock> {
protected GroupByResultsBlock getNextBlock() {
// Perform aggregation group-by on all the blocks
GroupByExecutor groupByExecutor;
+ // TODO: pass trimGroupSize to executor, who creates the result holder
if (_useStarTree) {
groupByExecutor = new StarTreeGroupByExecutor(_queryContext,
_groupByExpressions, _projectOperator);
} else {
groupByExecutor = new DefaultGroupByExecutor(_queryContext,
_groupByExpressions, _projectOperator);
}
ValueBlock valueBlock;
+
while ((valueBlock = _projectOperator.nextBlock()) != null) {
_numDocsScanned += valueBlock.getNumDocs();
groupByExecutor.process(valueBlock);
@@ -138,8 +141,17 @@ public class GroupByOperator extends
BaseOperator<GroupByResultsBlock> {
// TODO: Currently the groups are not trimmed if there is no ordering
specified. Consider ordering on group-by
// columns if no ordering is specified.
int minGroupTrimSize = _queryContext.getMinSegmentGroupTrimSize();
- if (_queryContext.getOrderByExpressions() != null && minGroupTrimSize > 0)
{
- int trimSize = GroupByUtils.getTableCapacity(_queryContext.getLimit(),
minGroupTrimSize);
+ int trimSize = -1;
+ List<OrderByExpressionContext> orderByExpressions =
_queryContext.getOrderByExpressions();
+ if (!_queryContext.isUnsafeTrim()) {
+ // if orderby key is groupby key, and there's no having clause
+ // keep at most `limit` rows only
+ trimSize = _queryContext.getLimit();
+ } else if (orderByExpressions != null && minGroupTrimSize > 0) {
+ // max(minSegmentGroupTrimSize, 5 * LIMIT)
+ trimSize = GroupByUtils.getTableCapacity(_queryContext.getLimit(),
minGroupTrimSize);
+ }
+ if (trimSize > 0) {
if (groupByExecutor.getNumGroups() > trimSize) {
TableResizer tableResizer = new TableResizer(_dataSchema,
_queryContext);
Collection<IntermediateRecord> intermediateRecords =
groupByExecutor.trimGroupByResult(trimSize, tableResizer);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]