This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 17b11086357 [fix](nereids)support uncorrelated subquery in join
condition (#26672)
17b11086357 is described below
commit 17b110863578a805a73205093b745f030c588fbf
Author: starocean999 <[email protected]>
AuthorDate: Mon Nov 13 11:49:11 2023 +0800
[fix](nereids)support uncorrelated subquery in join condition (#26672)
sql select * from t1 a join t1 b on b.id in (select 1) and a.id = b.id;
will report an error.
This pr support uncorrelated subquery in join condition to fix it
---
.../nereids/rules/analysis/SubqueryToApply.java | 123 ++++++++++++++++-----
.../nereids_syntax_p0/sub_query_correlated.out | 3 +
.../nereids_p0/subquery/test_subquery.groovy | 56 ++++++++++
.../nereids_syntax_p0/sub_query_correlated.groovy | 28 +++++
.../suites/query_p0/join/test_join.groovy | 2 +-
5 files changed, 183 insertions(+), 29 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 4c6a8fdb8ad..2852db11089 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.BinaryOperator;
-import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
import org.apache.doris.nereids.trees.expressions.Exists;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InSubquery;
@@ -45,6 +44,7 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@@ -180,7 +180,13 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
Collectors.toList()));
List<Expression> subqueryConjuncts =
joinConjuncts.get(true);
if (subqueryConjuncts == null || subqueryConjuncts.stream()
- .anyMatch(expr -> !isValidSubqueryConjunct(expr,
join.left()))) {
+ .anyMatch(expr -> !isValidSubqueryConjunct(expr)))
{
+ return join;
+ }
+
+ List<RelatedInfo> relatedInfoList = collectRelatedInfo(
+ subqueryConjuncts, join.left(), join.right());
+ if (relatedInfoList.stream().anyMatch(info -> info ==
RelatedInfo.UnSupported)) {
return join;
}
@@ -188,8 +194,9 @@ public class SubqueryToApply implements AnalysisRuleFactory
{
.<Set<SubqueryExpr>>map(e ->
e.collect(SubqueryExpr.class::isInstance))
.collect(ImmutableList.toImmutableList());
ImmutableList.Builder<Expression> newConjuncts = new
ImmutableList.Builder<>();
- LogicalPlan applyPlan = null;
+ LogicalPlan applyPlan;
LogicalPlan leftChildPlan = (LogicalPlan) join.left();
+ LogicalPlan rightChildPlan = (LogicalPlan) join.right();
// Subquery traversal with the conjunct of and as the
granularity.
for (int i = 0; i < subqueryExprsList.size(); ++i) {
@@ -207,9 +214,14 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
applyPlan = subqueryToApply(
subqueryExprs.stream().collect(ImmutableList.toImmutableList()),
- leftChildPlan,
context.getSubqueryToMarkJoinSlot(),
+ relatedInfoList.get(i) ==
RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan,
+ context.getSubqueryToMarkJoinSlot(),
ctx.cascadesContext, Optional.of(conjunct),
false);
- leftChildPlan = applyPlan;
+ if (relatedInfoList.get(i) ==
RelatedInfo.RelatedToLeft) {
+ leftChildPlan = applyPlan;
+ } else {
+ rightChildPlan = applyPlan;
+ }
newConjuncts.add(conjunct);
}
List<Expression> simpleConjuncts =
joinConjuncts.get(false);
@@ -217,34 +229,82 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
newConjuncts.addAll(simpleConjuncts);
}
Plan newJoin =
join.withConjunctsChildren(join.getHashJoinConjuncts(),
- newConjuncts.build(), applyPlan, join.right());
+ newConjuncts.build(), leftChildPlan,
rightChildPlan);
return newJoin;
}))
);
}
- private static boolean isValidSubqueryConjunct(Expression expression, Plan
leftChild) {
- // the subquery must be uncorrelated subquery or only correlated to
the left child
- // currently only support the following 4 simple scenarios
- // 1. col ComparisonPredicate subquery
- // 2. col in (subquery)
- // 3. exists (subquery)
- // 4. col1 ComparisonPredicate subquery or xxx (no more subquery)
- List<Slot> slots = leftChild.getOutput();
- if (expression instanceof ComparisonPredicate && expression.child(1)
instanceof ScalarSubquery) {
- ScalarSubquery subquery = (ScalarSubquery) expression.child(1);
- return slots.containsAll(subquery.getCorrelateSlots());
- } else if (expression instanceof InSubquery) {
- return slots.containsAll(((InSubquery)
expression).getCorrelateSlots());
- } else if (expression instanceof Exists) {
- return slots.containsAll(((Exists)
expression).getCorrelateSlots());
- } else {
+ private static boolean isValidSubqueryConjunct(Expression expression) {
+ // only support 1 subquery expr in the expression
+ // don't support expression like subquery1 or subquery2
+ return expression.collectToList(SubqueryExpr.class::isInstance).size()
== 1;
+ }
+
+ private enum RelatedInfo {
+ // both subquery and its output don't related to any child. like
(select sum(t.a) from t) > 1
+ Unrelated,
+ // either subquery or its output only related to left child. like
bellow:
+ // tableLeft.a in (select t.a from t)
+ // 3 in (select t.b from t where t.a = tableLeft.a)
+ // tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b)
+ RelatedToLeft,
+ // like above, but related to right child
+ RelatedToRight,
+ // subquery related to both left and child is not supported:
+ // tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b)
+ UnSupported
+ }
+
+ private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression>
subqueryConjuncts,
+ Plan leftChild, Plan rightChild) {
+ int size = subqueryConjuncts.size();
+ ImmutableList.Builder<RelatedInfo> correlatedInfoList = new
ImmutableList.Builder<>();
+ Set<Slot> leftOutputSlots = leftChild.getOutputSet();
+ Set<Slot> rightOutputSlots = rightChild.getOutputSet();
+ for (int i = 0; i < size; ++i) {
+ Expression expression = subqueryConjuncts.get(i);
List<SubqueryExpr> subqueryExprs =
expression.collectToList(SubqueryExpr.class::isInstance);
+ RelatedInfo relatedInfo = RelatedInfo.UnSupported;
if (subqueryExprs.size() == 1) {
- return
slots.containsAll(subqueryExprs.get(0).getCorrelateSlots());
+ SubqueryExpr subqueryExpr = subqueryExprs.get(0);
+ List<Slot> correlatedSlots = subqueryExpr.getCorrelateSlots();
+ if (subqueryExpr instanceof ScalarSubquery) {
+ Set<Slot> inputSlots = expression.getInputSlots();
+ if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) {
+ relatedInfo = RelatedInfo.Unrelated;
+ } else if (leftOutputSlots.containsAll(inputSlots)
+ && leftOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToLeft;
+ } else if (rightOutputSlots.containsAll(inputSlots)
+ && rightOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToRight;
+ }
+ } else if (subqueryExpr instanceof InSubquery) {
+ InSubquery inSubquery = (InSubquery) subqueryExpr;
+ Set<Slot> compareSlots =
inSubquery.getCompareExpr().getInputSlots();
+ if (compareSlots.isEmpty()) {
+ relatedInfo = RelatedInfo.UnSupported;
+ } else if (leftOutputSlots.containsAll(compareSlots)
+ && leftOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToLeft;
+ } else if (rightOutputSlots.containsAll(compareSlots)
+ && rightOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToRight;
+ }
+ } else if (subqueryExpr instanceof Exists) {
+ if (correlatedSlots.isEmpty()) {
+ relatedInfo = RelatedInfo.Unrelated;
+ } else if (leftOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToLeft;
+ } else if (rightOutputSlots.containsAll(correlatedSlots)) {
+ relatedInfo = RelatedInfo.RelatedToRight;
+ }
+ }
}
+ correlatedInfoList.add(relatedInfo);
}
- return false;
+ return correlatedInfoList.build();
}
private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs,
LogicalPlan childPlan,
@@ -270,10 +330,17 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>>
subqueryToMarkJoinSlot) {
return exists instanceof Exists
- && exists.getQueryPlan()
- .anyMatch(planTreeNode -> planTreeNode instanceof
LogicalAggregate
- && ((LogicalAggregate<?>)
planTreeNode).getGroupByExpressions().isEmpty())
- && !subqueryToMarkJoinSlot.get(exists).isPresent();
+ && !subqueryToMarkJoinSlot.get(exists).isPresent()
+ && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
+ }
+
+ private boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
+ if (plan instanceof LogicalAggregate) {
+ return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
+ } else if (plan instanceof LogicalProject || plan instanceof
LogicalSort) {
+ return hasTopLevelAggWithoutGroupBy(plan.child(0));
+ }
+ return false;
}
private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index c8051946673..0564d1884d9 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -486,3 +486,6 @@ true
-- !cir_5218_exists_ok_6 --
0
+-- !doris_7643 --
+3 3
+
diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
index 1c2dd55676d..01e347031f8 100644
--- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
+++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
@@ -65,6 +65,62 @@ suite("test_subquery") {
select * from nereids_test_query_db.baseall where k1 = (select k1
from nereids_test_query_db.baseall order by k1 desc limit 1)
"""
+ sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
+ sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
+ sql """CREATE TABLE `table_1000_undef_undef` (
+ `pk` int(11) NULL,
+ `col_bigint_undef_signed` bigint(20) NULL,
+ `col_bigint_undef_signed2` bigint(20) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`pk`, `col_bigint_undef_signed`,
`col_bigint_undef_signed2`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ ); """
+
+ sql """ CREATE TABLE `table_1000_undef_undef2` (
+ `pk` int(11) NULL,
+ `col_bigint_undef_signed` bigint(20) NULL,
+ `col_bigint_undef_signed2` bigint(20) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`pk`, `col_bigint_undef_signed`,
`col_bigint_undef_signed2`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );"""
+ explain {
+ sql """
+ SELECT `col_bigint_undef_signed` '00:39:36' ,
`col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37',
`col_bigint_undef_signed2` '19:01:23'
+ FROM table_1000_undef_undef2
+ WHERE EXISTS
+ (SELECT `col_bigint_undef_signed` '17:38:13' ,
`col_bigint_undef_signed2` '17:36:21'
+ FROM table_1000_undef_undef2
+ WHERE `col_bigint_undef_signed2` NOT IN
+ (SELECT `col_bigint_undef_signed`
+ FROM table_1000_undef_undef2
+ WHERE `col_bigint_undef_signed2` <
+ (SELECT AVG(`col_bigint_undef_signed`)
+ FROM table_1000_undef_undef2
+ WHERE `col_bigint_undef_signed2` < 2)) ) ;
+ """
+ contains("VAGGREGATE")
+ }
+
+ sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
+ sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
+
sql """drop table if exists test_one_row_relation;"""
sql """
CREATE TABLE `test_one_row_relation` (
diff --git
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index f64cba11097..0e98510e96f 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -648,6 +648,34 @@ suite ("sub_query_correlated") {
exception "Unsupported correlated subquery with grouping and/or
aggregation";
}
+ qt_doris_7643 """
+ SELECT sub_query_correlated_subquery6.*
+ FROM sub_query_correlated_subquery6
+ JOIN sub_query_correlated_subquery7
+ ON sub_query_correlated_subquery6.k2 =
sub_query_correlated_subquery7.k3
+ AND EXISTS
+ (SELECT sub_query_correlated_subquery8.k1
+ FROM sub_query_correlated_subquery8 )
+ AND sub_query_correlated_subquery6.k2 IN
+ (SELECT sub_query_correlated_subquery8.k2
+ FROM sub_query_correlated_subquery8 )
+ AND sub_query_correlated_subquery6.k1 IN
+ (SELECT sub_query_correlated_subquery8.k2
+ FROM sub_query_correlated_subquery8
+ WHERE sub_query_correlated_subquery6.k2 =
sub_query_correlated_subquery8.k2 )
+ AND sub_query_correlated_subquery7.k3 IN
+ (SELECT sub_query_correlated_subquery8.k1
+ FROM sub_query_correlated_subquery8 )
+ AND 10 >
+ (SELECT min(sub_query_correlated_subquery8.k2)
+ FROM sub_query_correlated_subquery8 )
+ AND sub_query_correlated_subquery7.k3 IN
+ (SELECT sub_query_correlated_subquery8.k2
+ FROM sub_query_correlated_subquery8
+ WHERE sub_query_correlated_subquery7.v1 =
sub_query_correlated_subquery8.k2 )
+ ORDER BY sub_query_correlated_subquery6.k1,
sub_query_correlated_subquery6.k2;
+ """
+
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where
sub_query_correlated_subquery1.k1 not in (select
sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where
sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 <
10 order by k1, k2;
// """
diff --git a/regression-test/suites/query_p0/join/test_join.groovy
b/regression-test/suites/query_p0/join/test_join.groovy
index e6d2906c0d3..15b010fdfae 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") {
qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2
LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS
NULL) order by 1, 2, 3, 4"""
test {
- sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and
a.k1 = b.k1 order by k1;"
+ sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from
baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
exception "Not support OnClause contain Subquery"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]