This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new acbfcf7ad92 [fix](Nereids) fix four phase aggregation compute wrong
result (#36131)
acbfcf7ad92 is described below
commit acbfcf7ad92433e3fcbabe8f5fd59aa11d385392
Author: 924060929 <[email protected]>
AuthorDate: Tue Jun 11 20:40:18 2024 +0800
[fix](Nereids) fix four phase aggregation compute wrong result (#36131)
cherry pick from #36128
---
.../rules/implementation/AggregateStrategies.java | 46 ++++++++++++++++++----
.../data/nereids_syntax_p0/agg_4_phase.out | 12 +++++-
.../suites/nereids_syntax_p0/agg_4_phase.groovy | 27 ++++++++++++-
3 files changed, 75 insertions(+), 10 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 9cb7b4d84a4..96b7507f3dc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -360,6 +360,12 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
.addAll(agg.getDistinctArguments())
.build().size() >
agg.getGroupByExpressions().size()
)
+ .when(agg -> {
+ if (agg.getDistinctArguments().size() == 1) {
+ return true;
+ }
+ return couldConvertToMulti(agg);
+ })
.thenApplyMulti(ctx -> {
Function<List<Expression>, RequireProperties>
secondPhaseRequireGroupByAndDistinctHash =
groupByAndDistinct -> RequireProperties.of(
@@ -1808,6 +1814,8 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
bufferToBufferParam, false, logicalAgg.getLogicalProperties(),
secondPhaseRequire, anyLocalAgg);
+ boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1;
+
// phase 3
AggregateParam distinctLocalParam = new AggregateParam(
AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned);
@@ -1826,11 +1834,25 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
||
aggregateFunction.getDistinctArguments().size() == 1,
"cannot process more than one child in
aggregate distinct function: "
+ aggregateFunction);
- AggregateFunction nonDistinct =
aggregateFunction
- .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
- AggregateExpression nonDistinctAggExpr = new
AggregateExpression(nonDistinct,
- distinctLocalParam, aggregateFunction);
- return nonDistinctAggExpr;
+
+ AggregateFunction newDistinct;
+ if (shouldDistinctAfterPhase2) {
+ // we use aggregate function to process
distinct,
+ // so need to change to multi distinct
function
+ newDistinct = tryConvertToMultiDistinct(
+
aggregateFunction.withDistinctAndChildren(
+ true,
ImmutableList.copyOf(aggChild))
+ );
+ } else {
+ // we use group by to process distinct,
+ // so no distinct param in the aggregate
function
+ newDistinct =
aggregateFunction.withDistinctAndChildren(
+ false,
ImmutableList.copyOf(aggChild));
+ }
+
+ AggregateExpression newDistinctAggExpr = new
AggregateExpression(
+ newDistinct, distinctLocalParam,
newDistinct);
+ return newDistinctAggExpr;
} else {
needUpdateSlot.add(aggregateFunction);
Alias alias =
nonDistinctAggFunctionToAliasPhase2.get(expr);
@@ -1867,11 +1889,19 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
||
aggregateFunction.getDistinctArguments().size() == 1,
"cannot process more than one child in
aggregate distinct function: "
+ aggregateFunction);
- AggregateFunction nonDistinct = aggregateFunction
- .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
+ AggregateFunction newDistinct;
+ if (shouldDistinctAfterPhase2) {
+ newDistinct = tryConvertToMultiDistinct(
+ aggregateFunction.withDistinctAndChildren(
+ true,
ImmutableList.copyOf(aggChild))
+ );
+ } else {
+ newDistinct = aggregateFunction
+ .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
+ }
int idx =
logicalAgg.getOutputExpressions().indexOf(outputExpr);
Alias localDistinctAlias = (Alias)
(localDistinctOutput.get(idx));
- return new AggregateExpression(nonDistinct,
+ return new AggregateExpression(newDistinct,
distinctGlobalParam,
localDistinctAlias.toSlot());
} else {
Alias alias =
nonDistinctAggFunctionToAliasPhase3.get(expr);
diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
index 5c5dde6f855..a939738ea18 100644
--- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
+++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
@@ -1,4 +1,14 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !4phase --
-3 160
+3
+
+-- !phase4_multi_distinct --
+1 -10,-10 1 a 1
+2 -4,-4 1 b 1
+3 -4 1 f 1
+
+-- !phase4_single_distinct --
+1 -10,-10 1
+2 -4,-4 1
+3 -4 1
diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
index d3b418660ab..ed69d0018c9 100644
--- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
+++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
@@ -47,7 +47,7 @@ suite("agg_4_phase") {
count(distinct id)
from agg_4_phase_tbl;
"""
- explain{
+ explain {
sql(test_sql)
contains ":VAGGREGATE (merge finalize)"
contains ":VEXCHANGE"
@@ -60,4 +60,29 @@ suite("agg_4_phase") {
sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;"""
sql """select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;"""
+
+
+ sql "drop table if exists agg_4_phase_tbl2"
+ sql "create table agg_4_phase_tbl2(id int, field1 int, field2
varchar(255)) properties('replication_num'='1');"
+ sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2,
-4, null), (2, -4, 'b'), (3, -4, 'f');\n"
+
+ qt_phase4_multi_distinct """
+ select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+ id,
+ group_concat(cast(field1 as varchar), ','),
+ count(distinct field1),
+ group_concat(cast(field2 as varchar), ','),
+ count(distinct field2)
+ from agg_4_phase_tbl2
+ group by id
+ order by id"""
+
+ qt_phase4_single_distinct """
+ select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+ id,
+ group_concat(cast(field1 as varchar), ','),
+ count(distinct field1)
+ from agg_4_phase_tbl2
+ group by id
+ order by id"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]