This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 47c674dd71c [SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in correlated scalar subquery 47c674dd71c is described below commit 47c674dd71c71062354b0eb51fb8e0e9b69ef2a8 Author: Hisoka <fanjiaemi...@qq.com> AuthorDate: Tue May 2 08:46:46 2023 -0700 [SPARK-43156][SQL][3.4] Fix `COUNT(*) is null` bug in correlated scalar subquery ### What changes were proposed in this pull request? Cherry pick fix COUNT(*) is null bug in correlated scalar subquery cherry pick from #40865 and #40946 ### Why are the changes needed? Fix COUNT(*) is null bug in correlated scalar subquery in branch 3.4 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add new test. Closes #40977 from Hisoka-X/count_bug. Lead-authored-by: Hisoka <fanjiaemi...@qq.com> Co-authored-by: Jack Chen <jack.c...@databricks.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../apache/spark/sql/catalyst/optimizer/subquery.scala | 6 +++++- .../scalar-subquery/scalar-subquery-count-bug.sql | 3 +++ .../scalar-subquery/scalar-subquery-select.sql | 5 +++++ .../scalar-subquery/scalar-subquery-count-bug.sql.out | 8 ++++---- .../scalar-subquery/scalar-subquery-select.sql.out | 18 ++++++++++++++++++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 83ff5e39739..52164512028 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -459,7 +459,11 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe case alias @ Alias(_: AttributeReference, _) => (alias.exprId, Literal.create(null, alias.dataType)) case alias @ Alias(l: Literal, _) => - (alias.exprId, l.copy(value = null)) + // SPARK-43156: return literal real value, count bug does not apply to the count + // function only, but all expressions in Aggregate that return non-null value for + // empty input. So we return the real value of the literal here, then the literal + // can be used for aggregate query result. + (alias.exprId, l) case ne => (ne.exprId, evalAggExprOnZeroTups(ne)) }.toMap } diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql index 0ca2f07b301..df88b8eb74d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql @@ -1,3 +1,6 @@ +--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true +--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false + create temp view l (a, b) as values (1, 2.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index 10f8f190fd8..b62cd4b68a1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -236,6 +236,11 @@ SELECT c, ( WHERE a + b = c ) FROM (VALUES (6)) t2(c); +-- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null` +SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1; + +select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2; + -- Set operations in correlation path CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0); diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out index 3012b67cf8c..c0abf569be6 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out @@ -86,14 +86,14 @@ from l -- !query schema struct<a:int,b:decimal(2,1),scalarsubquery(a):boolean> -- !query output -1 2.0 NULL -1 2.0 NULL +1 2.0 false +1 2.0 false 2 1.0 false 2 1.0 false 3 3.0 false 6 NULL false -NULL 5.0 NULL -NULL NULL NULL +NULL 5.0 false +NULL NULL false -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 016017ca770..7e81df1e371 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -543,6 +543,24 @@ struct<c:int,scalarsubquery(c):bigint> 6 4 +-- !query +SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1 +-- !query schema +struct<c1:int,c2:int,scalarsubquery(c1):boolean> +-- !query output +0 1 false +1 2 false + + +-- !query +select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2 +-- !query schema +struct<scalarsubquery():boolean> +-- !query output +false +false + + -- !query CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0) -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org