This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new c5b874da719 [SPARK-45920][SQL][3.3] group by ordinal should be
idempotent
c5b874da719 is described below
commit c5b874da719183b2acdece3391be8493272f9d58
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Nov 16 08:20:22 2023 -0800
[SPARK-45920][SQL][3.3] group by ordinal should be idempotent
backport https://github.com/apache/spark/pull/43797
### What changes were proposed in this pull request?
GROUP BY ordinal is not idempotent today. If the ordinal points to another
integer literal and the plan get analyzed again, we will re-do the ordinal
resolution which can lead to wrong result or index out-of-bound error. This PR
fixes it by using a hack: if the ordinal points to another integer literal,
don't replace the ordinal.
### Why are the changes needed?
For advanced users or Spark plugins, they may manipulate the logical plans
directly. We need to make the framework more reliable.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
new test
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #43839 from cloud-fan/3.3-port.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 14 ++++++++++++-
.../SubstituteUnresolvedOrdinalsSuite.scala | 23 ++++++++++++++++++++--
2 files changed, 34 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index d7bba23cf68..b4e520dd2e6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1865,7 +1865,19 @@ class Analyzer(override val catalogManager:
CatalogManager)
throw
QueryCompilationErrors.groupByPositionRefersToAggregateFunctionError(
index, ordinalExpr)
} else {
- ordinalExpr
+ trimAliases(ordinalExpr) match {
+ // HACK ALERT: If the ordinal expression is also an integer
literal, don't use it
+ // but still keep the ordinal literal. The reason
is we may repeatedly
+ // analyze the plan. Using a different integer
literal may lead to
+ // a repeat GROUP BY ordinal resolution which is
wrong. GROUP BY
+ // constant is meaningless so whatever value does
not matter here.
+ // TODO: (SPARK-45932) GROUP BY ordinal should pull out
grouping expressions to
+ // a Project, then the resolved ordinal expression is
always
+ // `AttributeReference`.
+ case Literal(_: Int, IntegerType) =>
+ Literal(index)
+ case _ => ordinalExpr
+ }
}
} else {
throw QueryCompilationErrors.groupByPositionRangeError(index,
aggs.size)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index c0312282c76..99fa62532f3 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -17,10 +17,11 @@
package org.apache.spark.sql.catalyst.analysis
-import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2
+import org.apache.spark.sql.catalyst.analysis.TestRelations.{testRelation,
testRelation2}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
import org.apache.spark.sql.internal.SQLConf
class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
@@ -67,4 +68,22 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest
{
testRelation2.groupBy(Literal(1), Literal(2))('a, 'b))
}
}
+
+ test("SPARK-45920: group by ordinal repeated analysis") {
+ val plan = testRelation.groupBy(Literal(1))(Literal(100).as("a")).analyze
+ comparePlans(
+ plan,
+ testRelation.groupBy(Literal(1))(Literal(100).as("a"))
+ )
+
+ val testRelationWithData = testRelation.copy(data = Seq(new
GenericInternalRow(Array(1: Any))))
+ // Copy the plan to reset its `analyzed` flag, so that analyzer rules will
re-apply.
+ val copiedPlan = plan.transform {
+ case _: LocalRelation => testRelationWithData
+ }
+ comparePlans(
+ copiedPlan.analyze, // repeated analysis
+ testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]