This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a76e4fa54b38 Revert [SPARK-54758][SQL] Fix generator resolution order
in Project
a76e4fa54b38 is described below
commit a76e4fa54b38ebc4e41cef5195e6e7f5ebf3e646
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Tue Dec 23 12:40:25 2025 +0800
Revert [SPARK-54758][SQL] Fix generator resolution order in Project
### What changes were proposed in this pull request?
Reverting my previous PR, because it broke LCA resolution
https://github.com/apache/spark/pull/53527
### Why are the changes needed?
There are 2 problems with LCA:
1. Current implementation breaks a number of queries because it creates a
circular dependancy: `Generator` waits for `UnresolvedFunction` and
`UnresolvedFunction` waits for generator's LCA which waits `Generator`
resolution. I have an idea how to do it properly, but first I want to revert
this.
2. A more fundamental problem is that currently generators' LCA could be
resolved from right to left, e.g., `SELECT explode(arr) as col,
explode(array(array(0), array(1), array(2))) as arr` works. This is quite
bizarre, but I will need to break this behavior if I want to enforce
left-to-right resolution.
### Does this PR introduce _any_ user-facing change?
'No'
### How was this patch tested?
Add new tests to golden files
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor 2.2.14
Closes #53562 from mikhailnik-db/revert-generator-order-fix.
Authored-by: Mikhail Nikoliukin <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 18 ++----------------
.../generators-resolution-edge-cases.sql.out | 22 ++++++++++++++++++++--
.../inputs/generators-resolution-edge-cases.sql | 8 +++++++-
.../generators-resolution-edge-cases.sql.out | 20 ++++++++++++++++++++
4 files changed, 49 insertions(+), 19 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 801b494e06de..c334e3d07607 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3065,13 +3065,6 @@ class Analyzer(
})
}
- private def hasUnresolvedGeneratorOrFunction(exprs: Seq[Expression]):
Boolean = {
- exprs.exists(_.exists {
- case _: UnresolvedFunction | _: UnresolvedGenerator => true
- case _ => false
- })
- }
-
private def trimAlias(expr: NamedExpression): Expression = expr match {
case UnresolvedAlias(child, _) => child
case Alias(child, _) => child
@@ -3163,21 +3156,15 @@ class Analyzer(
p
// The star will be expanded differently if we insert `Generate` under
`Project` too early.
- // We also wait for all functions and generators to be resolved to
ensure left-to-right
- // generator ordering.
- case p @ Project(projectList, child)
- if !projectList.exists(_.exists(_.isInstanceOf[Star])) &&
- !hasUnresolvedGeneratorOrFunction(projectList) =>
- var hasSeenGenerator = false
+ case p @ Project(projectList, child) if
!projectList.exists(_.exists(_.isInstanceOf[Star])) =>
val (resolvedGenerator, newProjectList) = projectList
.map(trimNonTopLevelAliases)
.foldLeft((None: Option[Generate], Nil: Seq[NamedExpression])) {
(res, e) =>
e match {
// If there are more than one generator, we only rewrite the
first one and wait for
// the next analyzer iteration to rewrite the next one.
- case AliasedGenerator(generator, names, outer) if
!hasSeenGenerator &&
+ case AliasedGenerator(generator, names, outer) if res._1.isEmpty
&&
generator.childrenResolved =>
- hasSeenGenerator = true
val g = Generate(
generator,
unrequiredChildIndex = Nil,
@@ -3187,7 +3174,6 @@ class Analyzer(
child)
(Some(g), res._2 ++ g.nullableOutput)
case other =>
- hasSeenGenerator |= hasGenerator(other)
(res._1, res._2 :+ other)
}
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
index 3212322d69c3..2cbb1fb5d382 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -38,8 +38,8 @@ Project [col#x, col#x]
SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
-- !query analysis
Project [col#x, col#x]
-+- Generate explode(array(10, 20)), false, [col#x]
- +- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2
as double))), false, [col#x]
++- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as
double))), false, [col#x]
+ +- Generate explode(array(10, 20)), false, [col#x]
+- OneRowRelation
@@ -460,3 +460,21 @@ Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED
PRECEDING AND UNBOUNDED FO
+- Window [count(1)
windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(),
unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)#xL]
+- Aggregate [count(1) AS count(1)#xL]
+- OneRowRelation
+
+
+-- !query
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
+-- !query analysis
+Project [arr#x, col#x]
++- Generate explode(arr#x), false, [col#x]
+ +- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
+-- !query analysis
+Project [col#x, arr#x]
++- Generate explode(arr#x), false, [col#x]
+ +- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
+ +- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
index 69538975742d..50ba6480efd9 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -7,7 +7,7 @@ SELECT 1 + explode(array(1, 2, 3));
-- multiple generators should work
SELECT explode(array(0, 1, 2)), explode(array(10, 20));
--- multiple generators are processed in left-to-right order regardless of
internal rule ordering
+-- multiple generators' order is not fixed and depends on rule ordering
SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20));
-- multiple generators in aggregate should fail
@@ -127,3 +127,9 @@ SELECT explode(array(1, 2, 3)) as col, count(*) OVER ();
-- generator with window function and aggregate together resolves in order
Aggregate -> Window -> Generator
SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*);
+
+-- generator LCA left-to-right should work
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as
col;
+
+-- generator LCA right-to-left should work
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as
arr;
diff --git
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
index 995798ca246b..dfe39028ea01 100644
---
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -460,3 +460,23 @@ struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED
PRECEDING AND UNBOUNDED FOL
1 1 1
2 1 1
3 1 1
+
+
+-- !query
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
+-- !query schema
+struct<arr:array<int>,col:int>
+-- !query output
+[0] 0
+[1] 1
+[2] 2
+
+
+-- !query
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
+-- !query schema
+struct<col:int,arr:array<int>>
+-- !query output
+0 [0]
+1 [1]
+2 [2]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]