This is an automated email from the ASF dual-hosted git repository.
huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2950631f075 [fix](Nereids) fix four phase aggregation compute wrong
result (#36128)
2950631f075 is described below
commit 2950631f0750e8134415d07373c299aa2ae68f30
Author: 924060929 <[email protected]>
AuthorDate: Tue Jun 11 20:38:20 2024 +0800
[fix](Nereids) fix four phase aggregation compute wrong result (#36128)
fix a bug introduced by #35871
Backend can process distinct aggregate by two method:
1. use group by key to process distinct, I call it: external distinct of
aggregate function
2. use `multi_distinct_xxx` to process distinct, in the corresponding
aggregate function, I call it: internal distinct of aggregate function
This sql should process distinct by the multi distinct function when use
four phase aggregation:
The first two phase use group by key: id, field1 and field2.
The last two phase use group by key: id. This group by key can not process
distinct in the aggregate function, so we should use internal distinct method
in four phase aggregate function, if contains multi distinct column(e.g. field1
and field2)
```sql
select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
id,
count(distinct field1),
count(distinct field2)
from agg_4_phase_tbl2
group by id
```
---
.../rules/implementation/AggregateStrategies.java | 46 ++++++++++++++++++----
.../data/nereids_syntax_p0/agg_4_phase.out | 12 +++++-
.../suites/nereids_syntax_p0/agg_4_phase.groovy | 27 ++++++++++++-
3 files changed, 75 insertions(+), 10 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index eae17b1a99e..d15de080913 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -370,6 +370,12 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
.addAll(agg.getDistinctArguments())
.build().size() >
agg.getGroupByExpressions().size()
)
+ .when(agg -> {
+ if (agg.getDistinctArguments().size() == 1) {
+ return true;
+ }
+ return couldConvertToMulti(agg);
+ })
.thenApplyMulti(ctx -> {
Function<List<Expression>, RequireProperties>
secondPhaseRequireGroupByAndDistinctHash =
groupByAndDistinct -> RequireProperties.of(
@@ -1849,6 +1855,8 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
bufferToBufferParam, false, logicalAgg.getLogicalProperties(),
secondPhaseRequire, anyLocalAgg);
+ boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1;
+
// phase 3
AggregateParam distinctLocalParam = new AggregateParam(
AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned);
@@ -1867,11 +1875,25 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
||
aggregateFunction.getDistinctArguments().size() == 1,
"cannot process more than one child in
aggregate distinct function: "
+ aggregateFunction);
- AggregateFunction nonDistinct =
aggregateFunction
- .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
- AggregateExpression nonDistinctAggExpr = new
AggregateExpression(nonDistinct,
- distinctLocalParam, aggregateFunction);
- return nonDistinctAggExpr;
+
+ AggregateFunction newDistinct;
+ if (shouldDistinctAfterPhase2) {
+ // we use aggregate function to process
distinct,
+ // so need to change to multi distinct
function
+ newDistinct = tryConvertToMultiDistinct(
+
aggregateFunction.withDistinctAndChildren(
+ true,
ImmutableList.copyOf(aggChild))
+ );
+ } else {
+ // we use group by to process distinct,
+ // so no distinct param in the aggregate
function
+ newDistinct =
aggregateFunction.withDistinctAndChildren(
+ false,
ImmutableList.copyOf(aggChild));
+ }
+
+ AggregateExpression newDistinctAggExpr = new
AggregateExpression(
+ newDistinct, distinctLocalParam,
newDistinct);
+ return newDistinctAggExpr;
} else {
needUpdateSlot.add(aggregateFunction);
Alias alias =
nonDistinctAggFunctionToAliasPhase2.get(expr);
@@ -1908,11 +1930,19 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
||
aggregateFunction.getDistinctArguments().size() == 1,
"cannot process more than one child in
aggregate distinct function: "
+ aggregateFunction);
- AggregateFunction nonDistinct = aggregateFunction
- .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
+ AggregateFunction newDistinct;
+ if (shouldDistinctAfterPhase2) {
+ newDistinct = tryConvertToMultiDistinct(
+ aggregateFunction.withDistinctAndChildren(
+ true,
ImmutableList.copyOf(aggChild))
+ );
+ } else {
+ newDistinct = aggregateFunction
+ .withDistinctAndChildren(false,
ImmutableList.copyOf(aggChild));
+ }
int idx =
logicalAgg.getOutputExpressions().indexOf(outputExpr);
Alias localDistinctAlias = (Alias)
(localDistinctOutput.get(idx));
- return new AggregateExpression(nonDistinct,
+ return new AggregateExpression(newDistinct,
distinctGlobalParam,
localDistinctAlias.toSlot());
} else {
Alias alias =
nonDistinctAggFunctionToAliasPhase3.get(expr);
diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
index 5c5dde6f855..a939738ea18 100644
--- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
+++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
@@ -1,4 +1,14 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !4phase --
-3 160
+3
+
+-- !phase4_multi_distinct --
+1 -10,-10 1 a 1
+2 -4,-4 1 b 1
+3 -4 1 f 1
+
+-- !phase4_single_distinct --
+1 -10,-10 1
+2 -4,-4 1
+3 -4 1
diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
index d3b418660ab..ed69d0018c9 100644
--- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
+++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
@@ -47,7 +47,7 @@ suite("agg_4_phase") {
count(distinct id)
from agg_4_phase_tbl;
"""
- explain{
+ explain {
sql(test_sql)
contains ":VAGGREGATE (merge finalize)"
contains ":VEXCHANGE"
@@ -60,4 +60,29 @@ suite("agg_4_phase") {
sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;"""
sql """select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;"""
+
+
+ sql "drop table if exists agg_4_phase_tbl2"
+ sql "create table agg_4_phase_tbl2(id int, field1 int, field2
varchar(255)) properties('replication_num'='1');"
+ sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2,
-4, null), (2, -4, 'b'), (3, -4, 'f');\n"
+
+ qt_phase4_multi_distinct """
+ select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+ id,
+ group_concat(cast(field1 as varchar), ','),
+ count(distinct field1),
+ group_concat(cast(field2 as varchar), ','),
+ count(distinct field2)
+ from agg_4_phase_tbl2
+ group by id
+ order by id"""
+
+ qt_phase4_single_distinct """
+ select
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+ id,
+ group_concat(cast(field1 as varchar), ','),
+ count(distinct field1)
+ from agg_4_phase_tbl2
+ group by id
+ order by id"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]