This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d2dc12bed5f [fix](nereids)exists subquery should handle top level
scarlar agg correctly (#29135)
d2dc12bed5f is described below
commit d2dc12bed5f78bc29da1fd4ba5be72fd2c3ef00b
Author: starocean999 <[email protected]>
AuthorDate: Fri Dec 29 09:45:20 2023 +0800
[fix](nereids)exists subquery should handle top level scarlar agg correctly
(#29135)
---
.../nereids/rules/analysis/SubqueryToApply.java | 51 +++++++------
.../nereids_syntax_p0/sub_query_correlated.out | 81 +++++++++++++++++++++
.../nereids_syntax_p0/sub_query_correlated.groovy | 83 ++++++++++++++++++++++
3 files changed, 193 insertions(+), 22 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 5f55f86cc91..675575ad298 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -325,7 +325,10 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
LogicalPlan tmpPlan = childPlan;
for (int i = 0; i < subqueryExprs.size(); ++i) {
SubqueryExpr subqueryExpr = subqueryExprs.get(i);
- if (nonMarkJoinExistsWithAgg(subqueryExpr,
subqueryToMarkJoinSlot)) {
+ if (subqueryExpr instanceof Exists &&
hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) {
+ // because top level scalar agg always returns a value or
null(for empty input)
+ // so Exists and Not Exists conjunct are always evaluated to
True and false literals respectively
+ // we don't create apply node for it
continue;
}
@@ -338,18 +341,11 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
return tmpPlan;
}
- private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
- Map<SubqueryExpr, Optional<MarkJoinSlotReference>>
subqueryToMarkJoinSlot) {
- return exists instanceof Exists
- && !subqueryToMarkJoinSlot.get(exists).isPresent()
- && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
- }
-
- private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
+ private static boolean hasTopLevelScalarAgg(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof
LogicalSort) {
- return hasTopLevelAggWithoutGroupBy(plan.child(0));
+ return hasTopLevelScalarAgg(plan.child(0));
}
return false;
}
@@ -427,19 +423,30 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
// The result set when NULL is specified in the subquery and still
evaluates to TRUE by using EXISTS
// When the number of rows returned is empty, agg will return
null, so if there is more agg,
// it will always consider the returned result to be true
- boolean needCreateMarkJoinSlot = isMarkJoin ||
shouldOutputMarkJoinSlot;
- MarkJoinSlotReference markJoinSlotReference = null;
- if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) &&
needCreateMarkJoinSlot) {
- markJoinSlotReference =
- new
MarkJoinSlotReference(statementContext.generateColumnName(), true);
- } else if (needCreateMarkJoinSlot) {
- markJoinSlotReference =
- new
MarkJoinSlotReference(statementContext.generateColumnName());
- }
- if (needCreateMarkJoinSlot) {
- context.setSubqueryToMarkJoinSlot(exists,
Optional.of(markJoinSlotReference));
+ if (hasTopLevelScalarAgg(exists.getQueryPlan())) {
+ /*
+ top level scalar agg and always return a value or null for
empty input
+ so Exists and Not Exists conjunct are always evaluated to True
and False literals respectively
+ SELECT *
+ FROM t1
+ WHERE EXISTS (
+ SELECT SUM(a)
+ FROM t2
+ WHERE t1.a = t2.b and t1.a = 1;
+ );
+ */
+ return exists.isNot() ? BooleanLiteral.FALSE :
BooleanLiteral.TRUE;
+ } else {
+ boolean needCreateMarkJoinSlot = isMarkJoin ||
shouldOutputMarkJoinSlot;
+ if (needCreateMarkJoinSlot) {
+ MarkJoinSlotReference markJoinSlotReference =
+ new
MarkJoinSlotReference(statementContext.generateColumnName());
+ context.setSubqueryToMarkJoinSlot(exists,
Optional.of(markJoinSlotReference));
+ return markJoinSlotReference;
+ } else {
+ return BooleanLiteral.TRUE;
+ }
}
- return needCreateMarkJoinSlot ? markJoinSlotReference :
BooleanLiteral.TRUE;
}
@Override
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index 0564d1884d9..e7758d02a5f 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -489,3 +489,84 @@ true
-- !doris_7643 --
3 3
+-- !select_exists1 --
+\N \N 0
+\N \N 100
+
+-- !select_exists2 --
+
+-- !select_exists3 --
+\N \N 0
+\N \N 100
+
+-- !select_exists4 --
+\N \N 100
+
+-- !select_exists5 --
+true
+true
+
+-- !select_exists6 --
+false
+false
+
+-- !select_exists7 --
+true
+true
+
+-- !select_exists8 --
+false
+true
+
+-- !select_exists9 --
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+
+-- !select_exists10 --
+
+-- !select_exists11 --
+\N \N 0
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+\N \N 100
+
+-- !select_exists12 --
+\N \N 0
+
diff --git
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index ac87bbc813f..09ba6c31bec 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -679,4 +679,87 @@ suite ("sub_query_correlated") {
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where
sub_query_correlated_subquery1.k1 not in (select
sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where
sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 <
10 order by k1, k2;
// """
+ sql """drop table if exists table_21_undef_partitions2_keys3;"""
+ sql """drop table if exists table_1_undef_partitions2_keys3;"""
+ sql """create table table_21_undef_partitions2_keys3 (
+ `col_int_undef_signed` int ,
+ `col_varchar_10__undef_signed` varchar(10) ,
+ `pk` int
+ ) engine=olap
+ distributed by hash(pk) buckets 10
+ properties('replication_num' = '1');"""
+ sql """create table table_1_undef_partitions2_keys3 (
+ `col_int_undef_signed` int ,
+ `col_varchar_10__undef_signed` varchar(10) ,
+ `pk` int
+ ) engine=olap
+ distributed by hash(pk) buckets 10
+ properties('replication_num' = '1');"""
+ sql """insert into
table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed)
values
(0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);"""
+ sql """insert into
table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed)
values (0,null,null),(100,null,null);"""
+
+ qt_select_exists1 """SELECT *
+ FROM table_1_undef_partitions2_keys3 AS t1
+ WHERE EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) order by t1.pk;"""
+ qt_select_exists2 """SELECT *
+ FROM table_1_undef_partitions2_keys3 AS t1
+ WHERE not EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) order by t1.pk;"""
+ qt_select_exists3 """SELECT *
+ FROM table_1_undef_partitions2_keys3 AS t1
+ WHERE EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = 9
+ ) or t1.pk = 100 order by t1.pk;"""
+ qt_select_exists4 """SELECT *
+ FROM table_1_undef_partitions2_keys3 AS t1
+ WHERE not EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = 9
+ ) or t1.pk = 100 order by t1.pk;"""
+ qt_select_exists5 """select EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) from table_1_undef_partitions2_keys3 AS
t1;"""
+ qt_select_exists6 """select not EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) from table_1_undef_partitions2_keys3 AS t1
order by t1.pk;"""
+ qt_select_exists7 """select EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) or t1.pk = 100 from
table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
+ qt_select_exists8 """select EXISTS (
+ SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9
+ ) and t1.pk = 100 from
table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
+ qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS
t1 join table_21_undef_partitions2_keys3 AS t2
+ on t1.pk = t2.pk and not exists ( SELECT
SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9 ) or
t1.pk = 100 order by t1.pk;"""
+ qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS
t1 join table_21_undef_partitions2_keys3 AS t2
+ on t1.pk = t2.pk and not exists ( SELECT
SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9 ) or
t1.pk > 100 order by t1.pk;"""
+ qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS
t1 join table_21_undef_partitions2_keys3 AS t2
+ on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9 ) or
t1.pk = 100 order by t1.pk;"""
+ qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS
t1 join table_21_undef_partitions2_keys3 AS t2
+ on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
+ FROM table_21_undef_partitions2_keys3 AS t2
+ WHERE t1.pk = t2.pk and t1.pk = 9 ) or
t1.pk > 100 order by t1.pk;"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]