This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2e945d7a069 [SPARK-44527][SQL] Replace ScalarSubquery with null if its
maxRows is 0
2e945d7a069 is described below
commit 2e945d7a0692a1d6911d3f9ec74875092fb2cbc0
Author: Yuming Wang <[email protected]>
AuthorDate: Sun Oct 8 14:11:16 2023 -0700
[SPARK-44527][SQL] Replace ScalarSubquery with null if its maxRows is 0
### What changes were proposed in this pull request?
This PR enhances `ConstantFolding` to replace ·ScalarSubquery· with null if
its maxRows is 0.
### Why are the changes needed?
Simplify expression to improve query performance. For example:
```scala
spark.sql("CREATE TABLE t1 using parquet AS SELECT * FROM range(10)")
spark.sql("CREATE TABLE t2 using parquet AS SELECT * FROM range(10)")
spark.sql("SELECT * FROM t1 WHERE t1.id = (SELECT id FROM t2 WHERE
false)").explain(true)
```
Before this PR:
```
== Optimized Logical Plan ==
Filter (isnotnull(id#3L) AND (id#3L = scalar-subquery#2 []))
: +- LocalRelation <empty>, [id#4L]
+- Relation spark_catalog.default.t1[id#3L] parquet
```
After this PR:
```
== Optimized Logical Plan ==
LocalRelation <empty>, [a#7L, b#8L]
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #42129 from wangyum/SPARK-44527.
Authored-by: Yuming Wang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/optimizer/expressions.scala | 4 ++++
.../catalyst/optimizer/ConstantFoldingSuite.scala | 22 ++++++++++++++++++++++
2 files changed, 26 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 90773a1eb86..cc14789f6f5 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -88,6 +88,10 @@ object ConstantFolding extends Rule[LogicalPlan] {
e
}
+ // Replace ScalarSubquery with null if its maxRows is 0
+ case s: ScalarSubquery if s.plan.maxRows.contains(0) =>
+ Literal(null, s.dataType)
+
case other => other.mapChildren(constantFolding(_, isConditionalBranch))
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 8734583d3c3..d8d58ea6aa9 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -415,6 +415,28 @@ class ConstantFoldingSuite extends PlanTest {
}
}
}
+
+ test("SPARK-44527: Replace ScalarSubquery with null if its maxRows is 0") {
+ val emptyRelation = LocalRelation($"a".int)
+ val oneRowRelation = LocalRelation.fromExternalRows(Seq($"a".int),
Seq(Row(1)))
+ val nullIntLit = Literal(null, IntegerType)
+
+ comparePlans(
+
Optimize.execute(testRelation.select(ScalarSubquery(emptyRelation).as("o")).analyze),
+ testRelation.select(nullIntLit.as("o")).analyze)
+
+ Seq(EqualTo, LessThan, GreaterThan).foreach { comparison =>
+ comparePlans(
+ Optimize.execute(testRelation
+ .select(comparison($"a",
ScalarSubquery(emptyRelation)).as("o")).analyze),
+ testRelation.select(comparison($"a", nullIntLit).as("o")).analyze)
+ }
+
+ val oneRowScalarSubquery =
testRelation.select(ScalarSubquery(oneRowRelation).as("o")).analyze
+ comparePlans(
+ Optimize.execute(oneRowScalarSubquery),
+ oneRowScalarSubquery)
+ }
}
case class SerializableBoxedInt(intVal: Int) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]