This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 6ab870f6a991 [SPARK-43979][SQL][FOLLOWUP] Handle non alias-only project case 6ab870f6a991 is described below commit 6ab870f6a9915da2d9f231586b9b85b8faf94e2e Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Wed Sep 20 17:45:22 2023 +0800 [SPARK-43979][SQL][FOLLOWUP] Handle non alias-only project case ### What changes were proposed in this pull request? `simplifyPlanForCollectedMetrics ` still could need to handle non alias-only project case where the project contains a mixed of aliases and attributes. In such case `simplifyPlanForCollectedMetrics` should also drop the extra project for the plan check when it contains CollectedMetrics. ### Why are the changes needed? Improve `simplifyPlanForCollectedMetrics` so it handles more plan pattern. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT ### Was this patch authored or co-authored using generative AI tooling? NO Closes #42971 from amaliujia/improve_simplification. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit d92d6f60342ca4d005cc2c1db94dc3b107f5d89b) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 4 +++- .../apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 43546bcaa421..139fa34a1dfc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -1109,7 +1109,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB * remove extra project which only re-assign expr ids from the plan so that we can identify exact * duplicates metric definition. */ - private def simplifyPlanForCollectedMetrics(plan: LogicalPlan): LogicalPlan = { + def simplifyPlanForCollectedMetrics(plan: LogicalPlan): LogicalPlan = { plan.resolveOperators { case p: Project if p.projectList.size == p.child.output.size => val assignExprIdOnly = p.projectList.zipWithIndex.forall { @@ -1118,6 +1118,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB // ordinal of this attribute in the child outputs. So an alias-only Project means the // the id of the aliased attribute is the same as its index in the project list. attr.exprId.id == index + case (left: AttributeReference, index) => + left.exprId.id == index case _ => false } if (assignExprIdOnly) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 06c3e3eb0405..57b37e67b32b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1667,4 +1667,18 @@ class AnalysisSuite extends AnalysisTest with Matchers { checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze) } } + + test("simplifyPlanForCollectedMetrics should handle non alias-only project case") { + val inner = Project( + Seq( + Alias(testRelation2.output(0), "a")(), + testRelation2.output(1), + Alias(testRelation2.output(2), "c")(), + testRelation2.output(3), + testRelation2.output(4) + ), + testRelation2) + val actualPlan = getAnalyzer.simplifyPlanForCollectedMetrics(inner.canonicalized) + assert(actualPlan == testRelation2.canonicalized) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org