This is an automated email from the ASF dual-hosted git repository.

starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d2dc12bed5f [fix](nereids)exists subquery should handle top level 
scarlar agg correctly (#29135)
d2dc12bed5f is described below

commit d2dc12bed5f78bc29da1fd4ba5be72fd2c3ef00b
Author: starocean999 <[email protected]>
AuthorDate: Fri Dec 29 09:45:20 2023 +0800

    [fix](nereids)exists subquery should handle top level scarlar agg correctly 
(#29135)
---
 .../nereids/rules/analysis/SubqueryToApply.java    | 51 +++++++------
 .../nereids_syntax_p0/sub_query_correlated.out     | 81 +++++++++++++++++++++
 .../nereids_syntax_p0/sub_query_correlated.groovy  | 83 ++++++++++++++++++++++
 3 files changed, 193 insertions(+), 22 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 5f55f86cc91..675575ad298 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -325,7 +325,10 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
         LogicalPlan tmpPlan = childPlan;
         for (int i = 0; i < subqueryExprs.size(); ++i) {
             SubqueryExpr subqueryExpr = subqueryExprs.get(i);
-            if (nonMarkJoinExistsWithAgg(subqueryExpr, 
subqueryToMarkJoinSlot)) {
+            if (subqueryExpr instanceof Exists && 
hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) {
+                // because top level scalar agg always returns a value or 
null(for empty input)
+                // so Exists and Not Exists conjunct are always evaluated to 
True and false literals respectively
+                // we don't create apply node for it
                 continue;
             }
 
@@ -338,18 +341,11 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
         return tmpPlan;
     }
 
-    private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
-                 Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot) {
-        return exists instanceof Exists
-                && !subqueryToMarkJoinSlot.get(exists).isPresent()
-                && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
-    }
-
-    private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
+    private static boolean hasTopLevelScalarAgg(Plan plan) {
         if (plan instanceof LogicalAggregate) {
             return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
         } else if (plan instanceof LogicalProject || plan instanceof 
LogicalSort) {
-            return hasTopLevelAggWithoutGroupBy(plan.child(0));
+            return hasTopLevelScalarAgg(plan.child(0));
         }
         return false;
     }
@@ -427,19 +423,30 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
             // The result set when NULL is specified in the subquery and still 
evaluates to TRUE by using EXISTS
             // When the number of rows returned is empty, agg will return 
null, so if there is more agg,
             // it will always consider the returned result to be true
-            boolean needCreateMarkJoinSlot = isMarkJoin || 
shouldOutputMarkJoinSlot;
-            MarkJoinSlotReference markJoinSlotReference = null;
-            if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && 
needCreateMarkJoinSlot) {
-                markJoinSlotReference =
-                        new 
MarkJoinSlotReference(statementContext.generateColumnName(), true);
-            } else if (needCreateMarkJoinSlot) {
-                markJoinSlotReference =
-                        new 
MarkJoinSlotReference(statementContext.generateColumnName());
-            }
-            if (needCreateMarkJoinSlot) {
-                context.setSubqueryToMarkJoinSlot(exists, 
Optional.of(markJoinSlotReference));
+            if (hasTopLevelScalarAgg(exists.getQueryPlan())) {
+                /*
+                top level scalar agg and always return a value or null for 
empty input
+                so Exists and Not Exists conjunct are always evaluated to True 
and False literals respectively
+                    SELECT *
+                    FROM t1
+                    WHERE EXISTS (
+                            SELECT SUM(a)
+                            FROM t2
+                            WHERE t1.a = t2.b and t1.a = 1;
+                        );
+                 */
+                return exists.isNot() ? BooleanLiteral.FALSE : 
BooleanLiteral.TRUE;
+            } else {
+                boolean needCreateMarkJoinSlot = isMarkJoin || 
shouldOutputMarkJoinSlot;
+                if (needCreateMarkJoinSlot) {
+                    MarkJoinSlotReference markJoinSlotReference =
+                            new 
MarkJoinSlotReference(statementContext.generateColumnName());
+                    context.setSubqueryToMarkJoinSlot(exists, 
Optional.of(markJoinSlotReference));
+                    return markJoinSlotReference;
+                } else {
+                    return BooleanLiteral.TRUE;
+                }
             }
-            return needCreateMarkJoinSlot ? markJoinSlotReference : 
BooleanLiteral.TRUE;
         }
 
         @Override
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out 
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index 0564d1884d9..e7758d02a5f 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -489,3 +489,84 @@ true
 -- !doris_7643 --
 3      3
 
+-- !select_exists1 --
+\N     \N      0
+\N     \N      100
+
+-- !select_exists2 --
+
+-- !select_exists3 --
+\N     \N      0
+\N     \N      100
+
+-- !select_exists4 --
+\N     \N      100
+
+-- !select_exists5 --
+true
+true
+
+-- !select_exists6 --
+false
+false
+
+-- !select_exists7 --
+true
+true
+
+-- !select_exists8 --
+false
+true
+
+-- !select_exists9 --
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+
+-- !select_exists10 --
+
+-- !select_exists11 --
+\N     \N      0
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+\N     \N      100
+
+-- !select_exists12 --
+\N     \N      0
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy 
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index ac87bbc813f..09ba6c31bec 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -679,4 +679,87 @@ suite ("sub_query_correlated") {
     // order_qt_doris_6937_2 """
     //     select * from sub_query_correlated_subquery1 where 
sub_query_correlated_subquery1.k1 not in (select 
sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where 
sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 
10 order by k1, k2;
     // """
+    sql """drop table if exists table_21_undef_partitions2_keys3;"""
+    sql """drop table if exists table_1_undef_partitions2_keys3;"""
+    sql """create table table_21_undef_partitions2_keys3 (
+            `col_int_undef_signed` int   ,
+            `col_varchar_10__undef_signed` varchar(10)   ,
+            `pk` int
+            ) engine=olap
+            distributed by hash(pk) buckets 10
+            properties('replication_num' = '1');"""
+    sql """create table table_1_undef_partitions2_keys3 (
+            `col_int_undef_signed` int   ,
+            `col_varchar_10__undef_signed` varchar(10)   ,
+            `pk` int
+            ) engine=olap
+            distributed by hash(pk) buckets 10
+            properties('replication_num' = '1');"""
+    sql """insert into 
table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed)
 values 
(0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);"""
+    sql """insert into 
table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed)
 values (0,null,null),(100,null,null);"""
+
+    qt_select_exists1 """SELECT *
+                            FROM table_1_undef_partitions2_keys3 AS t1
+                            WHERE EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) order by t1.pk;"""
+    qt_select_exists2 """SELECT *
+                            FROM table_1_undef_partitions2_keys3 AS t1
+                            WHERE not EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) order by t1.pk;"""
+    qt_select_exists3 """SELECT *
+                            FROM table_1_undef_partitions2_keys3 AS t1
+                            WHERE EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = 9
+                                ) or t1.pk = 100 order by t1.pk;"""
+    qt_select_exists4 """SELECT *
+                            FROM table_1_undef_partitions2_keys3 AS t1
+                            WHERE not EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = 9
+                                ) or t1.pk = 100 order by t1.pk;"""
+    qt_select_exists5 """select EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) from table_1_undef_partitions2_keys3 AS 
t1;"""
+    qt_select_exists6 """select not EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) from table_1_undef_partitions2_keys3 AS t1 
order by t1.pk;"""
+    qt_select_exists7 """select EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) or t1.pk = 100 from 
table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
+    qt_select_exists8 """select EXISTS (
+                                    SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9
+                                ) and t1.pk = 100 from 
table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
+    qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS 
t1 join table_21_undef_partitions2_keys3 AS t2 
+                                on t1.pk = t2.pk and not exists ( SELECT 
SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9 ) or 
t1.pk = 100 order by t1.pk;"""
+    qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS 
t1 join table_21_undef_partitions2_keys3 AS t2 
+                                on t1.pk = t2.pk and not exists ( SELECT 
SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9 ) or 
t1.pk > 100 order by t1.pk;"""
+    qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS 
t1  join table_21_undef_partitions2_keys3 AS t2 
+                                on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9 ) or 
t1.pk = 100 order by t1.pk;"""
+    qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS 
t1  join table_21_undef_partitions2_keys3 AS t2 
+                                on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
+                                    FROM table_21_undef_partitions2_keys3 AS t2
+                                    WHERE t1.pk = t2.pk and t1.pk = 9 ) or 
t1.pk > 100 order by t1.pk;"""                                                  
                                                                                
          
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to