This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 47c674dd71c [SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in
correlated scalar subquery
47c674dd71c is described below
commit 47c674dd71c71062354b0eb51fb8e0e9b69ef2a8
Author: Hisoka <[email protected]>
AuthorDate: Tue May 2 08:46:46 2023 -0700
[SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in correlated scalar
subquery
### What changes were proposed in this pull request?
Cherry pick fix COUNT(*) is null bug in correlated scalar subquery
cherry pick from #40865 and #40946
### Why are the changes needed?
Fix COUNT(*) is null bug in correlated scalar subquery in branch 3.4
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
add new test.
Closes #40977 from Hisoka-X/count_bug.
Lead-authored-by: Hisoka <[email protected]>
Co-authored-by: Jack Chen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/spark/sql/catalyst/optimizer/subquery.scala | 6 +++++-
.../scalar-subquery/scalar-subquery-count-bug.sql | 3 +++
.../scalar-subquery/scalar-subquery-select.sql | 5 +++++
.../scalar-subquery/scalar-subquery-count-bug.sql.out | 8 ++++----
.../scalar-subquery/scalar-subquery-select.sql.out | 18 ++++++++++++++++++
5 files changed, 35 insertions(+), 5 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 83ff5e39739..52164512028 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -459,7 +459,11 @@ object RewriteCorrelatedScalarSubquery extends
Rule[LogicalPlan] with AliasHelpe
case alias @ Alias(_: AttributeReference, _) =>
(alias.exprId, Literal.create(null, alias.dataType))
case alias @ Alias(l: Literal, _) =>
- (alias.exprId, l.copy(value = null))
+ // SPARK-43156: return literal real value, count bug does not apply to
the count
+ // function only, but all expressions in Aggregate that return
non-null value for
+ // empty input. So we return the real value of the literal here, then
the literal
+ // can be used for aggregate query result.
+ (alias.exprId, l)
case ne => (ne.exprId, evalAggExprOnZeroTups(ne))
}.toMap
}
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
index 0ca2f07b301..df88b8eb74d 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
@@ -1,3 +1,6 @@
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false
+
create temp view l (a, b)
as values
(1, 2.0),
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index 10f8f190fd8..b62cd4b68a1 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -236,6 +236,11 @@ SELECT c, (
WHERE a + b = c
) FROM (VALUES (6)) t2(c);
+-- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null`
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1;
+
+select (select f from (select false as f, max(c2) from t1 where t1.c1 =
t1.c1)) from t2;
+
-- Set operations in correlation path
CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
diff --git
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
index 3012b67cf8c..c0abf569be6 100644
---
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
@@ -86,14 +86,14 @@ from l
-- !query schema
struct<a:int,b:decimal(2,1),scalarsubquery(a):boolean>
-- !query output
-1 2.0 NULL
-1 2.0 NULL
+1 2.0 false
+1 2.0 false
2 1.0 false
2 1.0 false
3 3.0 false
6 NULL false
-NULL 5.0 NULL
-NULL NULL NULL
+NULL 5.0 false
+NULL NULL false
-- !query
diff --git
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 016017ca770..7e81df1e371 100644
---
a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -543,6 +543,24 @@ struct<c:int,scalarsubquery(c):bigint>
6 4
+-- !query
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1
+-- !query schema
+struct<c1:int,c2:int,scalarsubquery(c1):boolean>
+-- !query output
+0 1 false
+1 2 false
+
+
+-- !query
+select (select f from (select false as f, max(c2) from t1 where t1.c1 =
t1.c1)) from t2
+-- !query schema
+struct<scalarsubquery():boolean>
+-- !query output
+false
+false
+
+
-- !query
CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]