cryptoe commented on code in PR #15470:
URL: https://github.com/apache/druid/pull/15470#discussion_r1542239088
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java:
##########
@@ -164,39 +168,96 @@ public QueryDefinition makeQueryDefinition(
partitionBoost
);
- queryDefBuilder.add(
- StageDefinition.builder(firstStageNumber + 1)
- .inputs(new StageInputSpec(firstStageNumber))
- .signature(resultSignature)
- .maxWorkerCount(maxWorkerCount)
- .shuffleSpec(
- shuffleSpecFactoryPostAggregation != null
- ?
shuffleSpecFactoryPostAggregation.build(resultClusterBy, false)
- : null
- )
- .processorFactory(new
GroupByPostShuffleFrameProcessorFactory(queryToRun))
- );
+ final ShuffleSpec nextShuffleWindowSpec =
getShuffleSpecForNextWindow(originalQuery, maxWorkerCount);
- if (doLimitOrOffset) {
- final DefaultLimitSpec limitSpec = (DefaultLimitSpec)
queryToRun.getLimitSpec();
+ if (nextShuffleWindowSpec == null) {
queryDefBuilder.add(
- StageDefinition.builder(firstStageNumber + 2)
- .inputs(new StageInputSpec(firstStageNumber + 1))
+ StageDefinition.builder(firstStageNumber + 1)
+ .inputs(new StageInputSpec(firstStageNumber))
.signature(resultSignature)
- .maxWorkerCount(1)
- .shuffleSpec(null) // no shuffling should be required
after a limit processor.
- .processorFactory(
- new OffsetLimitFrameProcessorFactory(
- limitSpec.getOffset(),
- limitSpec.isLimited() ? (long)
limitSpec.getLimit() : null
- )
+ .maxWorkerCount(maxWorkerCount)
+ .shuffleSpec(
+ shuffleSpecFactoryPostAggregation != null
+ ?
shuffleSpecFactoryPostAggregation.build(resultClusterBy, false)
+ : null
)
+ .processorFactory(new
GroupByPostShuffleFrameProcessorFactory(queryToRun))
+ );
+
+ if (doLimitOrOffset) {
+ final DefaultLimitSpec limitSpec = (DefaultLimitSpec)
queryToRun.getLimitSpec();
+ queryDefBuilder.add(
+ StageDefinition.builder(firstStageNumber + 2)
+ .inputs(new StageInputSpec(firstStageNumber + 1))
+ .signature(resultSignature)
+ .maxWorkerCount(1)
+ .shuffleSpec(null) // no shuffling should be
required after a limit processor.
+ .processorFactory(
+ new OffsetLimitFrameProcessorFactory(
+ limitSpec.getOffset(),
+ limitSpec.isLimited() ? (long)
limitSpec.getLimit() : null
+ )
+ )
+ );
+ }
+ } else {
+ final RowSignature stageSignature;
+ // sort the signature to make sure the prefix is aligned
+ stageSignature = QueryKitUtils.sortableSignature(
+ resultSignature,
+ nextShuffleWindowSpec.clusterBy().getColumns()
+ );
+
+ queryDefBuilder.add(
+ StageDefinition.builder(firstStageNumber + 1)
+ .inputs(new StageInputSpec(firstStageNumber))
+ .signature(stageSignature)
+ .maxWorkerCount(maxWorkerCount)
+ .shuffleSpec(nextShuffleWindowSpec)
Review Comment:
In case of limit, this should not be nextShuffeWIndowSpec no ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]