This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new b0555522a1c [Fix](nereids) modify the binding aggregate function in
order by (#34606)
b0555522a1c is described below
commit b0555522a1c3b9b62057a695e292e075f7da21ef
Author: feiniaofeiafei <[email protected]>
AuthorDate: Thu May 16 14:10:50 2024 +0800
[Fix](nereids) modify the binding aggregate function in order by (#34606)
Do same job with pr #32758 and pr #33843.
Because the master branch2.0 differ in binding order by expressions. So
this pr is not a cherry-pick, but actually does same job.
For each order by expression:
If it does not have aggregate functions, firstly bind expression from
aggregate outputs. If not found, then bind expression from aggregate child
outputs.
If it has aggregate functions, firstly bind expression from aggregate
outputs which do not have aggregate functions. If not found, then bind
expression from aggregate child outputs.
Co-authored-by: feiniaofeiafei <[email protected]>
---
.../nereids/rules/analysis/BindExpression.java | 45 ++++++++++++-
.../nereids_syntax_p0/order_by_bind_priority.out | 47 ++++++++++++++
.../order_by_bind_priority.groovy | 74 ++++++++++++++++++++++
3 files changed, 164 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 72af439f452..275fe2281b0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -81,6 +81,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
@@ -430,14 +431,14 @@ public class BindExpression implements
AnalysisRuleFactory {
logicalSort(aggregate()).thenApply(ctx -> {
LogicalSort<Aggregate<Plan>> sort = ctx.root;
Aggregate<Plan> aggregate = sort.child();
- return bindSort(sort, aggregate, ctx.cascadesContext);
+ return bindSortAggregate(sort, aggregate,
ctx.cascadesContext, ctx.connectContext);
})
),
RuleType.BINDING_SORT_SLOT.build(
logicalSort(logicalHaving(aggregate())).thenApply(ctx -> {
LogicalSort<LogicalHaving<Aggregate<Plan>>> sort =
ctx.root;
Aggregate<Plan> aggregate = sort.child().child();
- return bindSort(sort, aggregate, ctx.cascadesContext);
+ return bindSortAggregate(sort, aggregate,
ctx.cascadesContext, ctx.connectContext);
})
),
RuleType.BINDING_SORT_SLOT.build(
@@ -698,6 +699,46 @@ public class BindExpression implements AnalysisRuleFactory
{
).stream().map(ruleCondition).collect(ImmutableList.toImmutableList());
}
+ private Plan bindSortAggregate(LogicalSort<? extends Plan> sort,
Aggregate<Plan> aggregate, CascadesContext ctx,
+ ConnectContext connectContext) {
+ List<Slot> aggOutput = aggregate.getOutput();
+ Plan aggChild = aggregate.child();
+ List<Slot> aggChildOutput = aggChild.getOutput();
+ List<Slot> aggregateOutputWithoutAggFunc = Lists.newArrayList();
+ List<NamedExpression> outputExpressions =
aggregate.getOutputExpressions();
+ for (NamedExpression outputExpr : outputExpressions) {
+ if (!outputExpr.anyMatch(expr -> expr instanceof
AggregateFunction)) {
+ aggregateOutputWithoutAggFunc.add(outputExpr.toSlot());
+ }
+ }
+
+ // find in agg outputs
+ Scope aggOutputScope = toScope(ctx, aggOutput);
+ SlotBinder bindByAggOutput = new SlotBinder(aggOutputScope, ctx, true,
false, true);
+ // find in agg child outputs
+ Scope aggChildOutputScope = toScope(ctx, aggChildOutput);
+ SlotBinder bindByAggChildOutput = new SlotBinder(aggChildOutputScope,
ctx, true, false, true);
+ // Find in the outputs of aggregate without aggregate function output
+ Scope aggregateOutputWithoutAggFuncScope = toScope(ctx,
aggregateOutputWithoutAggFunc);
+ SlotBinder bindByAggregateOutputWithoutAggFunc = new
SlotBinder(aggregateOutputWithoutAggFuncScope,
+ ctx, true, false, true);
+
+ FunctionRegistry functionRegistry =
connectContext.getEnv().getFunctionRegistry();
+ Builder<OrderKey> boundOrderKeys =
ImmutableList.builderWithExpectedSize(sort.getOrderKeys().size());
+ for (OrderKey orderKey : sort.getOrderKeys()) {
+ Expression boundKey;
+ if (hasAggregateFunction(orderKey.getExpr(), functionRegistry)) {
+ boundKey =
bindByAggregateOutputWithoutAggFunc.bind(orderKey.getExpr());
+ } else {
+ boundKey = bindByAggOutput.bind(orderKey.getExpr());
+ }
+ boundKey = bindByAggChildOutput.bind(boundKey);
+ boundKey = bindFunction(boundKey, sort, ctx);
+ boundOrderKeys.add(orderKey.withExpression(boundKey));
+ }
+ return new LogicalSort<>(boundOrderKeys.build(), sort.child());
+ }
+
private Plan bindSort(LogicalSort<? extends Plan> sort, Plan plan,
CascadesContext ctx) {
// 1. We should deduplicate the slots, otherwise the binding process
will fail due to the
// ambiguous slots exist.
diff --git a/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out
b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out
new file mode 100644
index 00000000000..4fd63bc7f99
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out
@@ -0,0 +1,47 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test_bind_order_by_with_aggfun1 --
+10 -5 -10
+4 -2 -4
+4 1 3
+6 3 6
+
+-- !test_bind_order_by_with_aggfun2 --
+4 -2 -4
+10 -5 0
+4 1 4
+6 3 6
+
+-- !test_bind_order_by_with_aggfun3 --
+5 -5 5
+2 -2 -2
+2 1 4
+3 3 3
+
+-- !test_bind_order_by_in_no_agg_func_output --
+1 4
+2 -2
+3 3
+5 5
+
+-- !test_multi_slots_in_agg_func_bind_first --
+\N -3 2 \N \N \N
+\N 0 5 \N 16155 16155
+\N 1 6 \N \N \N
+\N 2 7 \N \N \N
+\N 6 11 \N \N \N
+\N 8 13 \N \N \N
+\N 13 18 \N \N \N
+2 -5 0 8777 \N \N
+2 -4 1 127 30240 30240
+2 -2 3 10008 -54 -54
+2 3 8 29433 -4654 -4654
+2 4 9 13909 29600 29600
+2 12 17 22950 99 99
+4 -1 4 3618 2881 2881
+4 5 10 10450 8105 8105
+4 7 12 88 88 88
+4 9 14 74 14138 14138
+4 10 15 23 63 63
+4 11 16 4418 -24598 -24598
+4 14 19 2 \N \N
+
diff --git
a/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy
b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy
new file mode 100644
index 00000000000..44c8b5fa734
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("order_by_bind_priority") {
+ sql "SET enable_nereids_planner=true;"
+ sql "SET enable_fallback_to_original_planner=false;"
+
+ sql "drop table if exists t_order_by_bind_priority"
+ sql """create table t_order_by_bind_priority (c1 int, c2 int) distributed
by hash(c1) properties("replication_num"="1");"""
+ sql "insert into t_order_by_bind_priority values(-2,
-2),(1,2),(1,2),(3,3),(-5,5);"
+ sql "sync"
+
+
+ qt_test_bind_order_by_with_aggfun1 "select 2*abs(sum(c1)) as c1,
c1,sum(c1)+c1 from t_order_by_bind_priority group by c1 order by sum(c1)+c1
asc;"
+ qt_test_bind_order_by_with_aggfun2 "select 2*abs(sum(c1)) as c2,
c1,sum(c1)+c2 from t_order_by_bind_priority group by c1,c2 order by sum(c1)+c2
asc;"
+ qt_test_bind_order_by_with_aggfun3 "select abs(sum(c1)) as c1, c1,sum(c2)
as c2 from t_order_by_bind_priority group by c1 order by sum(c1) asc;"
+ qt_test_bind_order_by_in_no_agg_func_output "select abs(c1) xx, sum(c2)
from t_order_by_bind_priority group by xx order by min(xx)"
+ test {
+ sql "select abs(sum(c1)) as c1, c1,sum(c2) as c2 from
t_order_by_bind_priority group by c1 order by sum(c1)+c2 asc;"
+ exception "c2 should be grouped by."
+ }
+ sql """drop table if exists
table_20_undef_partitions2_keys3_properties4_distributed_by58"""
+ sql """
+ create table
table_20_undef_partitions2_keys3_properties4_distributed_by58 (
+ pk int,
+ col_int_undef_signed int ,
+ col_int_undef_signed2 int
+ ) engine=olap
+ DUPLICATE KEY(pk, col_int_undef_signed)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ """
+ sql """
+ insert into
table_20_undef_partitions2_keys3_properties4_distributed_by58(pk,col_int_undef_signed,col_int_undef_signed2)
values (0,-8777,null),
+
(1,-127,30240),(2,null,null),(3,-10008,-54),(4,3618,2881),(5,null,16155),(6,null,null),(7,null,null),(8,-29433,-4654),(9,-13909,29600),(10,10450,8105),
+
(11,null,null),(12,88,88),(13,null,null),(14,74,14138),(15,23,63),(16,4418,-24598),(17,-22950,99),(18,null,null),(19,2,null);
+ """
+ sql "sync;"
+
+ qt_test_multi_slots_in_agg_func_bind_first """
+ SELECT
+ SIGN( SUM(col_int_undef_signed) ) + 3 AS col_int_undef_signed,
+ pk - 5 pk ,
+ pk pk ,
+ ABS( MIN(col_int_undef_signed) ) AS col_int_undef_signed,
+ MAX(col_int_undef_signed2) col_int_undef_signed2,
+ col_int_undef_signed2 col_int_undef_signed2
+ FROM
+ table_20_undef_partitions2_keys3_properties4_distributed_by58
tbl_alias1
+ GROUP BY
+ pk,
+ col_int_undef_signed2
+ ORDER BY
+ col_int_undef_signed,
+ pk - 5,
+ pk,
+ col_int_undef_signed2 ;
+ """
+
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]