This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new a2c915db50c [SPARK-43760][SQL][3.4] Nullability of scalar subquery results a2c915db50c is described below commit a2c915db50c76fee1290d8a6a6aab9f41100a60b Author: Andrey Gubichev <andrey.gubic...@databricks.com> AuthorDate: Thu Jun 1 15:10:11 2023 +0800 [SPARK-43760][SQL][3.4] Nullability of scalar subquery results ### What changes were proposed in this pull request? Backport of https://github.com/apache/spark/pull/41287. Makes sure that the results of scalar subqueries are declared as nullable. ### Why are the changes needed? This is an existing correctness bug, see https://issues.apache.org/jira/browse/SPARK-43760 ### Does this PR introduce _any_ user-facing change? Fixes a correctness issue, so it is user-facing. ### How was this patch tested? Query tests. Closes #41408 from agubichev/spark-43760-nullability-branch-3.4. Authored-by: Andrey Gubichev <andrey.gubic...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/optimizer/subquery.scala | 2 +- .../scalar-subquery/scalar-subquery-predicate.sql | 10 ++++++++++ .../scalar-subquery/scalar-subquery-select.sql | 18 +++++++++++++++++- .../scalar-subquery-predicate.sql.out | 15 +++++++++++++++ .../scalar-subquery/scalar-subquery-select.sql.out | 21 +++++++++++++++++++++ 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 52164512028..1d2f5602630 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -393,7 +393,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe val newExpression = expression.transformWithPruning(_.containsPattern(SCALAR_SUBQUERY)) { case s: ScalarSubquery if s.children.nonEmpty => subqueries += s - s.plan.output.head + s.plan.output.head.withNullability(true) } newExpression.asInstanceOf[E] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index c8fe4bea642..e5551250dfe 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -398,3 +398,13 @@ HAVING t0a < FROM t2 WHERE t2b <= t0b) ); + +-- SPARK-43760: the result of the subquery can be NULL. +select * +from range(1, 3) t1 +where (select sum(c) from ( + select t2.id * t2.id c + from range (1, 2) t2 where t1.id = t2.id + group by t2.id + ) +) is not null; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index b62cd4b68a1..48d1594fa51 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -344,4 +344,20 @@ SELECT t0a, (SELECT sum(d) FROM SELECT sum(t2a) + t0a as d FROM t2) ) -FROM t0; \ No newline at end of file +FROM t0; + +-- SPARK-43760: the result of the subquery can be NULL. +select * +from +( + select t1.id c1, ( + select sum(c) + from ( + select t2.id * t2.id c + from range (1, 2) t2 where t1.id = t2.id + group by t2.id + ) + ) c2 + from range (1, 3) t1 +) t +where t.c2 is not null; diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out index d5dc0f83ef4..46c430d5ba7 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out @@ -648,3 +648,18 @@ HAVING t0a < struct<t0a:int,t0b:int> -- !query output 1 1 + + +-- !query +select * +from range(1, 3) t1 +where (select sum(c) from ( + select t2.id * t2.id c + from range (1, 2) t2 where t1.id = t2.id + group by t2.id + ) +) is not null +-- !query schema +struct<id:bigint> +-- !query output +1 diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 7e81df1e371..d92a32d2463 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -754,3 +754,24 @@ org.apache.spark.sql.AnalysisException "fragment" : "SELECT sum(t0a) as d\n FROM t1" } ] } + + +-- !query +select * +from +( + select t1.id c1, ( + select sum(c) + from ( + select t2.id * t2.id c + from range (1, 2) t2 where t1.id = t2.id + group by t2.id + ) + ) c2 + from range (1, 3) t1 +) t +where t.c2 is not null +-- !query schema +struct<c1:bigint,c2:bigint> +-- !query output +1 1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org