This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9a125d35666 [Fix](nereids)make agg output unchanged after normalized
repeat (#36207)
9a125d35666 is described below
commit 9a125d35666a109dd245cab166f9d5bcba66993f
Author: feiniaofeiafei <[email protected]>
AuthorDate: Thu Jun 13 18:20:30 2024 +0800
[Fix](nereids)make agg output unchanged after normalized repeat (#36207)
The NormalizeRepeat rule can change the output of agg.
For example:
```sql
SELECT
col_int_undef_signed2 AS C1 ,
col_int_undef_signed2
FROM
normalize_repeat_name_unchanged
GROUP BY
GROUPING SETS (
(col_int_undef_signed2),
(col_int_undef_signed2))
```
Before fixing the bug, the plan is:
```sql
LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
+--LogicalProject[94] ( distinct=false, projects=[C1#7, C1#7],
excepts=[] )
+--LogicalAggregate[93] ( groupByExpr=[C1#7, GROUPING_ID#8],
outputExpr=[C1#7, GROUPING_ID#8], hasRepeat=true )
+--LogicalRepeat ( groupingSets=[[C1#7], [C1#7]],
outputExpressions=[C1#7, GROUPING_ID#8] )
+--LogicalProject[91] ( distinct=false,
projects=[col_int_undef_signed2#1 AS `C1`#7], excepts=[] )
+--LogicalOlapScan ( )
```
This can lead to column not found in LogicalResultSink, report error:
Input slot(s) not in childs output: col_int_undef_signed2#1 in plan:
LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
child output is: [C1#7]
This pr makes agg output unchanged after normalized repeat. After
fixing, the plan is:
```sql
LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
+--LogicalProject[94] ( distinct=false, projects=[C1#7, C1#7 as
`col_int_undef_signed2`#1], excepts=[] )
+--LogicalAggregate[93] ( groupByExpr=[C1#7, GROUPING_ID#8],
outputExpr=[C1#7, GROUPING_ID#8], hasRepeat=true )
+--LogicalRepeat ( groupingSets=[[C1#7], [C1#7]],
outputExpressions=[C1#7, GROUPING_ID#8] )
+--LogicalProject[91] ( distinct=false,
projects=[col_int_undef_signed2#1 AS `C1`#7], excepts=[] )
+--LogicalOlapScan ( )
```
---------
Co-authored-by: feiniaofeiafei <[email protected]>
---
.../nereids/rules/analysis/NormalizeRepeat.java | 24 +++++++++++++++
.../grouping_sets/grouping_normalize_test.out | 28 +++++++++++++++++
.../grouping_sets/grouping_normalize_test.groovy | 35 ++++++++++++++++++++++
3 files changed, 87 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeRepeat.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeRepeat.java
index 0ff15ac7ecf..6465b81da30 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeRepeat.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeRepeat.java
@@ -42,6 +42,7 @@ import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.PlanUtils.CollectNonWindowedAggFuncs;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -198,6 +199,8 @@ public class NormalizeRepeat extends OneAnalysisRuleFactory
{
.addAll(groupingSetsUsedSlot)
.addAll(allVirtualSlots)
.build();
+
+ normalizedAggOutput =
getExprIdUnchangedNormalizedAggOutput(normalizedAggOutput,
repeat.getOutputExpressions());
return new LogicalAggregate<>(normalizedAggGroupBy, (List)
normalizedAggOutput,
Optional.of(normalizedRepeat), normalizedRepeat);
}
@@ -460,4 +463,25 @@ public class NormalizeRepeat extends
OneAnalysisRuleFactory {
return hasNewChildren ? windowExpression.withChildren(newChildren)
: windowExpression;
}
}
+
+ private static List<NamedExpression> getExprIdUnchangedNormalizedAggOutput(
+ List<NamedExpression> normalizedAggOutput, List<NamedExpression>
originalAggOutput) {
+ Builder<NamedExpression> builder = new ImmutableList.Builder<>();
+ for (int i = 0; i < originalAggOutput.size(); i++) {
+ NamedExpression e = normalizedAggOutput.get(i);
+ // process Expression like Alias(SlotReference#0)#0
+ if (e instanceof Alias && e.child(0) instanceof SlotReference) {
+ SlotReference slotReference = (SlotReference) e.child(0);
+ if (slotReference.getExprId().equals(e.getExprId())) {
+ e = slotReference;
+ }
+ }
+ // Make the output ExprId unchanged
+ if (!e.getExprId().equals(originalAggOutput.get(i).getExprId())) {
+ e = new Alias(originalAggOutput.get(i).getExprId(), e,
originalAggOutput.get(i).getName());
+ }
+ builder.add(e);
+ }
+ return builder.build();
+ }
}
diff --git
a/regression-test/data/nereids_rules_p0/grouping_sets/grouping_normalize_test.out
b/regression-test/data/nereids_rules_p0/grouping_sets/grouping_normalize_test.out
index 41e0576ecab..af071e01159 100644
---
a/regression-test/data/nereids_rules_p0/grouping_sets/grouping_normalize_test.out
+++
b/regression-test/data/nereids_rules_p0/grouping_sets/grouping_normalize_test.out
@@ -7,3 +7,31 @@
29 -3 32
41 1 \N
+-- !test_name_unchange --
+\N \N
+\N \N
+-2169155 -2169155
+-2169155 -2169155
+-1760025 -1760025
+-1760025 -1760025
+-27328 -27328
+-27328 -27328
+-23380 -23380
+-23380 -23380
+-23025 -23025
+-23025 -23025
+-127 -127
+-127 -127
+-88 -88
+-88 -88
+-73 -73
+-73 -73
+25 25
+25 25
+5694 5694
+5694 5694
+29932 29932
+29932 29932
+5907087 5907087
+5907087 5907087
+
diff --git
a/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_normalize_test.groovy
b/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_normalize_test.groovy
index 93821452f2f..091ddc7a071 100644
---
a/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_normalize_test.groovy
+++
b/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_normalize_test.groovy
@@ -54,4 +54,39 @@ suite("grouping_normalize_test"){
sql("SELECT col_int_undef_signed, col_int_undef_signed2, SUM(pk)
FROM grouping_normalize_test GROUP BY GROUPING SETS ((col_int_undef_signed,
col_int_undef_signed2));")
notContains("VREPEAT_NODE")
}
+
+
+ sql "drop table if exists normalize_repeat_name_unchanged"
+ sql """create table normalize_repeat_name_unchanged (
+ col_int_undef_signed int/*agg_type_placeholder*/ ,
+ col_int_undef_signed2 int/*agg_type_placeholder*/ ,
+ col_float_undef_signed float/*agg_type_placeholder*/ ,
+ col_int_undef_signed3 int/*agg_type_placeholder*/ ,
+ col_int_undef_signed4 int/*agg_type_placeholder*/ ,
+ col_int_undef_signed5 int/*agg_type_placeholder*/ ,
+ pk int/*agg_type_placeholder*/
+ ) engine=olap
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");"""
+ sql """
+ insert into
normalize_repeat_name_unchanged(pk,col_int_undef_signed,col_int_undef_signed2,col_float_undef_signed,
+ col_int_undef_signed3,col_int_undef_signed4,col_int_undef_signed5) values
(0,null,-27328,5595590,null,null,5767077),(1,3831,null,87,-14582,21,null),
+
(2,10131,5907087,28248,2473748,88,-18315),(3,2352090,5694,5173440,null,null,-31126),(4,-26805,29932,null,-55,3148,-6705245),(5,null,null,41,57,-3060427,null),
+
(6,118,25,3472000,-123,null,-2934940),(7,null,null,-109,112,-7344754,4326526),(8,null,-2169155,-19402,null,null,26943),(9,46,null,1736620,30084,13838,null),
+
(10,24708,null,null,-806832,-116,676),(11,2232,-23025,null,9665,-27413,13457),(12,-6,-127,-5007917,20521,-48,2709),(13,-72,-127,3258,null,-6394361,-5580),
+
(14,4494439,-1760025,-16580,66,6562396,-280256),(15,6099281,-73,-5376852,-303421,null,-1843),(16,122,-23380,null,7350221,111,null),
+
(17,null,null,11356,null,11799,108),(18,-91,-88,39,-29582,null,121),(19,4991662,null,-220,7593505,-54,4086882);"""
+
+ qt_test_name_unchange """
+ SELECT
+ col_int_undef_signed2 AS C1 ,
+ col_int_undef_signed2
+ FROM
+ normalize_repeat_name_unchanged
+ GROUP BY
+ GROUPING SETS (
+ (col_int_undef_signed2),
+ (col_int_undef_signed2))
+ order by 1,2
+ """
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]