This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 669fc1b2c1c [SPARK-39216][SQL] Do not collapse projects in
CombineUnions if it hasCorrelatedSubquery
669fc1b2c1c is described below
commit 669fc1b2c1cce7049a9f10e386ed1af050de3909
Author: Yuming Wang <[email protected]>
AuthorDate: Wed May 18 23:37:25 2022 -0700
[SPARK-39216][SQL] Do not collapse projects in CombineUnions if it
hasCorrelatedSubquery
### What changes were proposed in this pull request?
Makes `CombineUnions` do not collapse projects if it hasCorrelatedSubquery.
For example:
```sql
SELECT (SELECT IF(x, 1, 0)) AS a
FROM (SELECT true) t(x)
UNION
SELECT 1 AS a
```
It will throw exception:
```
java.lang.IllegalStateException: Couldn't find x#4 in []
```
### Why are the changes needed?
Fix bug.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #36595 from wangyum/SPARK-39216.
Authored-by: Yuming Wang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 85bb7bf008d0346feaedc2aab55857d8f1b19908)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 4 +++-
.../scala/org/apache/spark/sql/SQLQuerySuite.scala | 25 ++++++++++++++++++++++
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 94e9d3cdd14..02f9a9eb01c 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1340,7 +1340,9 @@ object CombineUnions extends Rule[LogicalPlan] {
while (stack.nonEmpty) {
stack.pop() match {
case p1 @ Project(_, p2: Project)
- if canCollapseExpressions(p1.projectList, p2.projectList,
alwaysInline = false) =>
+ if canCollapseExpressions(p1.projectList, p2.projectList,
alwaysInline = false) &&
+ !p1.projectList.exists(SubqueryExpression.hasCorrelatedSubquery)
&&
+ !p2.projectList.exists(SubqueryExpression.hasCorrelatedSubquery)
=>
val newProjectList = buildCleanedProjectList(p1.projectList,
p2.projectList)
stack.pushAll(Seq(p2.copy(projectList = newProjectList)))
case Distinct(Union(children, byName, allowMissingCol))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 919fe88ec4b..0761f8e2749 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4478,6 +4478,31 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
))
}
}
+
+ test("SPARK-39216: Don't collapse projects in CombineUnions if it
hasCorrelatedSubquery") {
+ checkAnswer(
+ sql(
+ """
+ |SELECT (SELECT IF(x, 1, 0)) AS a
+ |FROM (SELECT true) t(x)
+ |UNION
+ |SELECT 1 AS a
+ """.stripMargin),
+ Seq(Row(1)))
+
+ checkAnswer(
+ sql(
+ """
+ |SELECT x + 1
+ |FROM (SELECT id
+ | + (SELECT Max(id)
+ | FROM range(2)) AS x
+ | FROM range(1)) t
+ |UNION
+ |SELECT 1 AS a
+ """.stripMargin),
+ Seq(Row(2), Row(1)))
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]