This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 80ea98b371a [fix](nereids)subquery unnesting get wrong result if
correlated conjuncts is not slot_a = slot_b (#37683)
80ea98b371a is described below
commit 80ea98b371a5aa55efa049f501ca305ba136f7a7
Author: starocean999 <[email protected]>
AuthorDate: Tue Jul 16 15:06:40 2024 +0800
[fix](nereids)subquery unnesting get wrong result if correlated conjuncts
is not slot_a = slot_b (#37683)
pick from master https://github.com/apache/doris/pull/37644
## Proposed changes
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/jobs/executor/Rewriter.java | 6 +-
.../nereids/rules/analysis/SubExprAnalyzer.java | 10 +++
.../rewrite/UnCorrelatedApplyAggregateFilter.java | 18 +++++-
.../trees/plans/logical/LogicalProject.java | 4 ++
.../java/org/apache/doris/nereids/util/Utils.java | 56 ++++++++++++++---
.../nereids_syntax_p0/test_subquery_conjunct.out | 54 ++++++++++++++++
.../sub_query_diff_old_optimize.groovy | 2 +-
.../test_subquery_conjunct.groovy | 71 ++++++++++++++++++++++
8 files changed, 206 insertions(+), 15 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index a13e784c7a8..9df26fce31f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -200,7 +200,11 @@ public class Rewriter extends AbstractBatchJobExecutor {
* TODO: group these rules to make sure the
result plan is what we expected.
*/
new CorrelateApplyToUnCorrelateApply(),
- new ApplyToJoin()
+ new ApplyToJoin(),
+ // UnCorrelatedApplyAggregateFilter rule will
create new aggregate outputs,
+ // The later rule CheckPrivileges which
inherent from ColumnPruning only works
+ // if the aggregation node is normalized, so
we need call NormalizeAggregate here
+ new NormalizeAggregate()
)
),
// before `Subquery unnesting` topic, some correlate slots
should have appeared at LogicalApply.left,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
index d52fc96ff91..7bfd5256f6a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
@@ -75,6 +75,11 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
@Override
public Expression visitExistsSubquery(Exists exists, T context) {
+ LogicalPlan queryPlan = exists.getQueryPlan();
+ // distinct is useless, remove it
+ if (queryPlan instanceof LogicalProject && ((LogicalProject)
queryPlan).isDistinct()) {
+ exists = exists.withSubquery(((LogicalProject)
queryPlan).withDistinct(false));
+ }
AnalyzedResult analyzedResult = analyzeSubquery(exists);
if (analyzedResult.rootIsLimitZero()) {
return BooleanLiteral.of(exists.isNot());
@@ -89,6 +94,11 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
@Override
public Expression visitInSubquery(InSubquery expr, T context) {
+ LogicalPlan queryPlan = expr.getQueryPlan();
+ // distinct is useless, remove it
+ if (queryPlan instanceof LogicalProject && ((LogicalProject)
queryPlan).isDistinct()) {
+ expr = expr.withSubquery(((LogicalProject)
queryPlan).withDistinct(false));
+ }
AnalyzedResult analyzedResult = analyzeSubquery(expr);
checkOutputColumn(analyzedResult.getLogicalPlan());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
index 211e76710c5..f4acd847652 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
@@ -19,8 +19,10 @@ package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalApply;
@@ -31,6 +33,7 @@ import org.apache.doris.nereids.util.Utils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.List;
@@ -97,10 +100,19 @@ public class UnCorrelatedApplyAggregateFilter implements
RewriteRuleFactory {
// pull up correlated filter into apply node
List<NamedExpression> newAggOutput = new
ArrayList<>(agg.getOutputExpressions());
List<Expression> newGroupby =
- Utils.getCorrelatedSlots(correlatedPredicate,
apply.getCorrelationSlot());
+ Utils.getUnCorrelatedExprs(correlatedPredicate,
apply.getCorrelationSlot());
newGroupby.addAll(agg.getGroupByExpressions());
-
newAggOutput.addAll(newGroupby.stream().map(NamedExpression.class::cast)
- .collect(ImmutableList.toImmutableList()));
+ Map<Expression, Slot> unCorrelatedExprToSlot = Maps.newHashMap();
+ for (Expression expression : newGroupby) {
+ if (expression instanceof Slot) {
+ newAggOutput.add((NamedExpression) expression);
+ } else {
+ Alias alias = new Alias(expression);
+ unCorrelatedExprToSlot.put(expression, alias.toSlot());
+ newAggOutput.add(alias);
+ }
+ }
+ correlatedPredicate = ExpressionUtils.replace(correlatedPredicate,
unCorrelatedExprToSlot);
LogicalAggregate newAgg = new LogicalAggregate<>(newGroupby,
newAggOutput,
PlanUtils.filterOrSelf(ImmutableSet.copyOf(unCorrelatedPredicate),
filter.child()));
return new LogicalApply<>(apply.getCorrelationSlot(),
apply.getSubqueryExpr(),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index 24d938636aa..4174d625936 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -199,6 +199,10 @@ public class LogicalProject<CHILD_TYPE extends Plan>
extends LogicalUnary<CHILD_
return new LogicalProject<>(projects, excepts, isDistinct,
canEliminate, ImmutableList.of(child));
}
+ public LogicalProject<Plan> withDistinct(boolean isDistinct) {
+ return new LogicalProject<>(projects, excepts, isDistinct,
canEliminate, children);
+ }
+
public boolean isDistinct() {
return isDistinct;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
index 852e148ef1d..0928833c105 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
@@ -17,8 +17,11 @@
package org.apache.doris.nereids.util;
+import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
@@ -178,18 +181,51 @@ public class Utils {
}
/**
- * Get the correlated columns that belong to the subquery,
- * that is, the correlated columns that can be resolved within the
subquery.
+ * Get the unCorrelated exprs that belong to the subquery,
+ * that is, the unCorrelated exprs that can be resolved within the
subquery.
* eg:
- * select * from t1 where t1.a = (select sum(t2.b) from t2 where t1.c =
t2.d));
- * correlatedPredicates : t1.c = t2.d
- * correlatedSlots : t1.c
- * return t2.d
+ * select * from t1 where t1.a = (select sum(t2.b) from t2 where t1.c =
abs(t2.d));
+ * correlatedPredicates : t1.c = abs(t2.d)
+ * unCorrelatedExprs : abs(t2.d)
+ * return abs(t2.d)
*/
- public static List<Expression> getCorrelatedSlots(List<Expression>
correlatedPredicates,
- List<Expression> correlatedSlots) {
- return ExpressionUtils.getInputSlotSet(correlatedPredicates).stream()
- .filter(slot ->
!correlatedSlots.contains(slot)).collect(Collectors.toList());
+ public static List<Expression> getUnCorrelatedExprs(List<Expression>
correlatedPredicates,
+ List<Expression>
correlatedSlots) {
+ List<Expression> unCorrelatedExprs = new ArrayList<>();
+ correlatedPredicates.forEach(predicate -> {
+ if (!(predicate instanceof BinaryExpression) && (!(predicate
instanceof Not)
+ || !(predicate.child(0) instanceof BinaryExpression))) {
+ throw new AnalysisException(
+ "Unsupported correlated subquery with correlated
predicate "
+ + predicate.toString());
+ }
+
+ BinaryExpression binaryExpression;
+ if (predicate instanceof Not) {
+ binaryExpression = (BinaryExpression) ((Not)
predicate).child();
+ } else {
+ binaryExpression = (BinaryExpression) predicate;
+ }
+ Expression left = binaryExpression.left();
+ Expression right = binaryExpression.right();
+ Set<Slot> leftInputSlots = left.getInputSlots();
+ Set<Slot> rightInputSlots = right.getInputSlots();
+ boolean correlatedToLeft = !leftInputSlots.isEmpty()
+ &&
leftInputSlots.stream().allMatch(correlatedSlots::contains)
+ &&
rightInputSlots.stream().noneMatch(correlatedSlots::contains);
+ boolean correlatedToRight = !rightInputSlots.isEmpty()
+ &&
rightInputSlots.stream().allMatch(correlatedSlots::contains)
+ &&
leftInputSlots.stream().noneMatch(correlatedSlots::contains);
+ if (!correlatedToLeft && !correlatedToRight) {
+ throw new AnalysisException(
+ "Unsupported correlated subquery with correlated
predicate " + predicate);
+ } else if (correlatedToLeft && !rightInputSlots.isEmpty()) {
+ unCorrelatedExprs.add(right);
+ } else if (correlatedToRight && !leftInputSlots.isEmpty()) {
+ unCorrelatedExprs.add(left);
+ }
+ });
+ return unCorrelatedExprs;
}
private static List<Expression> collectCorrelatedSlotsFromChildren(
diff --git a/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out
b/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out
new file mode 100644
index 00000000000..bd1ac12b8ea
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_simple_scalar --
+-2 -2
+2 2
+3 2
+
+-- !select_complex_scalar --
+2 2
+3 2
+
+-- !select_simple_in --
+1 1
+2 1
+
+-- !select_complex_in --
+1 1
+2 1
+
+-- !select_simple_not_in --
+-2 -2
+-1 -1
+1 1
+2 1
+2 2
+3 2
+
+-- !select_complex_not_in --
+-2 -2
+-1 -1
+1 1
+2 1
+2 2
+3 2
+
+-- !select_simple_exists --
+-2 -2
+2 2
+3 2
+
+-- !select_complex_exists --
+2 2
+3 2
+
+-- !select_simple_not_exists --
+-1 -1
+1 1
+2 1
+
+-- !select_complex_not_exists --
+-2 -2
+-1 -1
+1 1
+2 1
+
diff --git
a/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
b/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
index 248b89a01ef..3c6b013a275 100644
---
a/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
+++
b/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
@@ -193,7 +193,7 @@ suite ("sub_query_diff_old_optimize") {
sql """
SELECT DISTINCT k1 FROM sub_query_diff_old_optimize_subquery1 i1
WHERE ((SELECT count(*) FROM sub_query_diff_old_optimize_subquery1 WHERE ((k1 =
i1.k1) AND (k2 = 2)) or ((k2 = i1.k1) AND (k2 = 1)) ) > 0);
"""
- exception "scalar subquery's correlatedPredicates's operator must be
EQ"
+ exception "Unsupported correlated subquery with correlated predicate"
}
}
diff --git
a/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy
b/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy
new file mode 100644
index 00000000000..43b47fc2ca4
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_subquery_conjunct") {
+ sql "set enable_nereids_planner=true"
+ sql "set enable_fallback_to_original_planner=false"
+ sql """drop table if exists subquery_conjunct_table;"""
+ sql """CREATE TABLE `subquery_conjunct_table` (
+ `id` INT NOT NULL,
+ `c1` INT NOT NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`, `c1`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );"""
+
+ sql """insert into subquery_conjunct_table values(1,
1),(2,2),(-1,-1),(-2,-2),(2,1),(3,2);"""
+ qt_select_simple_scalar """select * from subquery_conjunct_table t1 where
abs(t1.c1) != (select sum(c1) from subquery_conjunct_table t2 where t2.c1 +
t2.id = t1.c1) order by t1.id, t1.c1;"""
+ qt_select_complex_scalar """select * from subquery_conjunct_table t1 where
abs(t1.c1) != (select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 +
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+ qt_select_simple_in """select * from subquery_conjunct_table t1 where
abs(t1.c1) in (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id -1
= t1.c1) order by t1.id, t1.c1;"""
+ qt_select_complex_in """select * from subquery_conjunct_table t1 where
abs(t1.c1) in (select c1 from subquery_conjunct_table t2 where abs(t2.c1+ t2.id
-1) = t1.c1) order by t1.id, t1.c1;"""
+ qt_select_simple_not_in """select * from subquery_conjunct_table t1 where
abs(t1.c1) not in (select c1 from subquery_conjunct_table t2 where t2.c1 +
t2.id = t1.c1) order by t1.id, t1.c1;"""
+ qt_select_complex_not_in """select * from subquery_conjunct_table t1 where
abs(t1.c1) not in (select c1 from subquery_conjunct_table t2 where abs(t2.c1 +
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+ qt_select_simple_exists """select * from subquery_conjunct_table t1 where
exists (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id = t1.c1)
order by t1.id, t1.c1;"""
+ qt_select_complex_exists """select * from subquery_conjunct_table t1 where
exists (select c1 from subquery_conjunct_table t2 where abs(t2.c1 + t2.id) =
t1.c1) order by t1.id, t1.c1;"""
+ qt_select_simple_not_exists """select * from subquery_conjunct_table t1
where not exists (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id
= t1.c1) order by t1.id, t1.c1;"""
+ qt_select_complex_not_exists """select * from subquery_conjunct_table t1
where not exists (select c1 from subquery_conjunct_table t2 where abs(t2.c1 +
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) !=
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) - t1.c1 = 0)
order by t1.id; """
+ exception "Unsupported correlated subquery with correlated predicate"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) != (
select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 -1) + t1.id =
t1.c1) order by t1.id, t1.c1; """
+ exception "Unsupported correlated subquery with correlated predicate"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) !=
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) > t1.c1) order
by t1.id; """
+ exception "scalar subquery's correlatedPredicates's operator must be
EQ"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) in
(select sum(c1) from subquery_conjunct_table t2 where t2.c1 + 1 = t1.c1) order
by t1.id, t1.c1; """
+ exception "Unsupported correlated subquery with grouping and/or
aggregation"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) in
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) = t1.c1) order
by t1.id, t1.c1; """
+ exception "Unsupported correlated subquery with grouping and/or
aggregation"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) not
in (select sum(c1) from subquery_conjunct_table t2 where t2.c1 + 1= t1.c1)
order by t1.id, t1.c1; """
+ exception "Unsupported correlated subquery with grouping and/or
aggregation"
+ }
+ test {
+ sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) not
in (select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 -1) = t1.c1)
order by t1.id, t1.c1; """
+ exception "Unsupported correlated subquery with grouping and/or
aggregation"
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]