This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new bf4edb5 [SPARK-36353][SQL] RemoveNoopOperators should keep output
schema
bf4edb5 is described below
commit bf4edb5f5aff5875fe8043c60e7d8a7e07249082
Author: Angerszhuuuu <[email protected]>
AuthorDate: Thu Aug 5 20:43:35 2021 +0800
[SPARK-36353][SQL] RemoveNoopOperators should keep output schema
### What changes were proposed in this pull request?
RemoveNoopOperators should keep output schema
### Why are the changes needed?
Expand function
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Not need
Closes #33587 from AngersZhuuuu/SPARK-36355.
Authored-by: Angerszhuuuu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 02810eecbfae6cbfcd91c2a8f9a95aee93031451)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 26 +++++++++++++++++++++-
.../optimizer/RemoveNoopOperatorsSuite.scala | 12 ++++++++++
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d30481f..369fb51 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -515,10 +515,34 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
* Remove no-op operators from the query plan that do not make any
modifications.
*/
object RemoveNoopOperators extends Rule[LogicalPlan] {
+ def restoreOriginalOutputNames(
+ projectList: Seq[NamedExpression],
+ originalNames: Seq[String]): Seq[NamedExpression] = {
+ projectList.zip(originalNames).map {
+ case (attr: Attribute, name) => attr.withName(name)
+ case (alias: Alias, name) => alias.withName(name)
+ case (other, _) => other
+ }
+ }
+
def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
_.containsAnyPattern(PROJECT, WINDOW), ruleId) {
// Eliminate no-op Projects
- case p @ Project(_, child) if child.sameOutput(p) => child
+ case p @ Project(projectList, child) if child.sameOutput(p) =>
+ val newChild = child match {
+ case p: Project =>
+ p.copy(projectList = restoreOriginalOutputNames(p.projectList,
projectList.map(_.name)))
+ case agg: Aggregate =>
+ agg.copy(aggregateExpressions =
+ restoreOriginalOutputNames(agg.aggregateExpressions,
projectList.map(_.name)))
+ case _ =>
+ child
+ }
+ if (newChild.output.zip(projectList).forall { case (a1, a2) => a1.name
== a2.name }) {
+ newChild
+ } else {
+ p
+ }
// Eliminate no-op Window
case w: Window if w.windowExpressions.isEmpty => w.child
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
index cedd21d..943d207 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
@@ -54,4 +54,16 @@ class RemoveNoopOperatorsSuite extends PlanTest {
comparePlans(optimized, testRelation)
}
+
+ test("SPARK-36353: RemoveNoopOperators should keep output schema") {
+ val query = testRelation
+ .select(('a + 'b).as("c"))
+ .analyze
+ val originalQuery = Project(Seq(query.output.head.withName("C")), query)
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val result = testRelation
+ .select(('a + 'b).as("C"))
+ .analyze
+ comparePlans(optimized, result)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]