This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 47c674dd71c [SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in 
correlated scalar subquery
47c674dd71c is described below

commit 47c674dd71c71062354b0eb51fb8e0e9b69ef2a8
Author: Hisoka <fanjiaemi...@qq.com>
AuthorDate: Tue May 2 08:46:46 2023 -0700

    [SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in correlated scalar 
subquery
    
    ### What changes were proposed in this pull request?
    Cherry pick fix COUNT(*) is null bug in correlated scalar subquery
    
    cherry pick from #40865 and #40946
    
    ### Why are the changes needed?
    Fix COUNT(*) is null bug in correlated scalar subquery in branch 3.4
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    add new test.
    
    Closes #40977 from Hisoka-X/count_bug.
    
    Lead-authored-by: Hisoka <fanjiaemi...@qq.com>
    Co-authored-by: Jack Chen <jack.c...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../apache/spark/sql/catalyst/optimizer/subquery.scala |  6 +++++-
 .../scalar-subquery/scalar-subquery-count-bug.sql      |  3 +++
 .../scalar-subquery/scalar-subquery-select.sql         |  5 +++++
 .../scalar-subquery/scalar-subquery-count-bug.sql.out  |  8 ++++----
 .../scalar-subquery/scalar-subquery-select.sql.out     | 18 ++++++++++++++++++
 5 files changed, 35 insertions(+), 5 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 83ff5e39739..52164512028 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -459,7 +459,11 @@ object RewriteCorrelatedScalarSubquery extends 
Rule[LogicalPlan] with AliasHelpe
       case alias @ Alias(_: AttributeReference, _) =>
         (alias.exprId, Literal.create(null, alias.dataType))
       case alias @ Alias(l: Literal, _) =>
-        (alias.exprId, l.copy(value = null))
+        // SPARK-43156: return literal real value, count bug does not apply to 
the count
+        // function only, but all expressions in Aggregate that return 
non-null value for
+        // empty input. So we return the real value of the literal here, then 
the literal
+        // can be used for aggregate query result.
+        (alias.exprId, l)
       case ne => (ne.exprId, evalAggExprOnZeroTups(ne))
     }.toMap
   }
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
index 0ca2f07b301..df88b8eb74d 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
@@ -1,3 +1,6 @@
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false
+
 create temp view l (a, b)
 as values
     (1, 2.0),
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index 10f8f190fd8..b62cd4b68a1 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -236,6 +236,11 @@ SELECT c, (
     WHERE a + b = c
 ) FROM (VALUES (6)) t2(c);
 
+-- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null`
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1;
+
+select (select f from (select false as f, max(c2) from t1 where t1.c1 = 
t1.c1)) from t2;
+
 -- Set operations in correlation path
 
 CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
index 3012b67cf8c..c0abf569be6 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
@@ -86,14 +86,14 @@ from l
 -- !query schema
 struct<a:int,b:decimal(2,1),scalarsubquery(a):boolean>
 -- !query output
-1      2.0     NULL
-1      2.0     NULL
+1      2.0     false
+1      2.0     false
 2      1.0     false
 2      1.0     false
 3      3.0     false
 6      NULL    false
-NULL   5.0     NULL
-NULL   NULL    NULL
+NULL   5.0     false
+NULL   NULL    false
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 016017ca770..7e81df1e371 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -543,6 +543,24 @@ struct<c:int,scalarsubquery(c):bigint>
 6      4
 
 
+-- !query
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1
+-- !query schema
+struct<c1:int,c2:int,scalarsubquery(c1):boolean>
+-- !query output
+0      1       false
+1      2       false
+
+
+-- !query
+select (select f from (select false as f, max(c2) from t1 where t1.c1 = 
t1.c1)) from t2
+-- !query schema
+struct<scalarsubquery():boolean>
+-- !query output
+false
+false
+
+
 -- !query
 CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to