This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a76e4fa54b38 Revert [SPARK-54758][SQL] Fix generator resolution order 
in Project
a76e4fa54b38 is described below

commit a76e4fa54b38ebc4e41cef5195e6e7f5ebf3e646
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Tue Dec 23 12:40:25 2025 +0800

    Revert [SPARK-54758][SQL] Fix generator resolution order in Project
    
    ### What changes were proposed in this pull request?
    
    Reverting my previous PR, because it broke LCA resolution 
https://github.com/apache/spark/pull/53527
    
    ### Why are the changes needed?
    
    There are 2 problems with LCA:
    1. Current implementation breaks a number of queries because it creates a 
circular dependancy: `Generator` waits for `UnresolvedFunction` and 
`UnresolvedFunction` waits for generator's LCA which waits `Generator` 
resolution. I have an idea how to do it properly, but first I want to revert 
this.
    2. A more fundamental problem is that currently generators' LCA could be 
resolved from right to left, e.g., `SELECT explode(arr) as col, 
explode(array(array(0), array(1), array(2))) as arr` works. This is quite 
bizarre, but I will need to break this behavior if I want to enforce 
left-to-right resolution.
    
    ### Does this PR introduce _any_ user-facing change?
    
    'No'
    
    ### How was this patch tested?
    
    Add new tests to golden files
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 2.2.14
    
    Closes #53562 from mikhailnik-db/revert-generator-order-fix.
    
    Authored-by: Mikhail Nikoliukin <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 18 ++----------------
 .../generators-resolution-edge-cases.sql.out       | 22 ++++++++++++++++++++--
 .../inputs/generators-resolution-edge-cases.sql    |  8 +++++++-
 .../generators-resolution-edge-cases.sql.out       | 20 ++++++++++++++++++++
 4 files changed, 49 insertions(+), 19 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 801b494e06de..c334e3d07607 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3065,13 +3065,6 @@ class Analyzer(
       })
     }
 
-    private def hasUnresolvedGeneratorOrFunction(exprs: Seq[Expression]): 
Boolean = {
-      exprs.exists(_.exists {
-        case _: UnresolvedFunction | _: UnresolvedGenerator => true
-        case _ => false
-      })
-    }
-
     private def trimAlias(expr: NamedExpression): Expression = expr match {
       case UnresolvedAlias(child, _) => child
       case Alias(child, _) => child
@@ -3163,21 +3156,15 @@ class Analyzer(
         p
 
       // The star will be expanded differently if we insert `Generate` under 
`Project` too early.
-      // We also wait for all functions and generators to be resolved to 
ensure left-to-right
-      // generator ordering.
-      case p @ Project(projectList, child)
-          if !projectList.exists(_.exists(_.isInstanceOf[Star])) &&
-             !hasUnresolvedGeneratorOrFunction(projectList) =>
-        var hasSeenGenerator = false
+      case p @ Project(projectList, child) if 
!projectList.exists(_.exists(_.isInstanceOf[Star])) =>
         val (resolvedGenerator, newProjectList) = projectList
           .map(trimNonTopLevelAliases)
           .foldLeft((None: Option[Generate], Nil: Seq[NamedExpression])) { 
(res, e) =>
             e match {
               // If there are more than one generator, we only rewrite the 
first one and wait for
               // the next analyzer iteration to rewrite the next one.
-              case AliasedGenerator(generator, names, outer) if 
!hasSeenGenerator &&
+              case AliasedGenerator(generator, names, outer) if res._1.isEmpty 
&&
                   generator.childrenResolved =>
-                hasSeenGenerator = true
                 val g = Generate(
                   generator,
                   unrequiredChildIndex = Nil,
@@ -3187,7 +3174,6 @@ class Analyzer(
                   child)
                 (Some(g), res._2 ++ g.nullableOutput)
               case other =>
-                hasSeenGenerator |= hasGenerator(other)
                 (res._1, res._2 :+ other)
             }
           }
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
index 3212322d69c3..2cbb1fb5d382 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -38,8 +38,8 @@ Project [col#x, col#x]
 SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20))
 -- !query analysis
 Project [col#x, col#x]
-+- Generate explode(array(10, 20)), false, [col#x]
-   +- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 
as double))), false, [col#x]
++- Generate explode(array(SIN(cast(0 as double)), cast(1 as double), cast(2 as 
double))), false, [col#x]
+   +- Generate explode(array(10, 20)), false, [col#x]
       +- OneRowRelation
 
 
@@ -460,3 +460,21 @@ Project [col#x, count(1) OVER (ROWS BETWEEN UNBOUNDED 
PRECEDING AND UNBOUNDED FO
          +- Window [count(1) 
windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), 
unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING)#xL]
             +- Aggregate [count(1) AS count(1)#xL]
                +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
+-- !query analysis
+Project [arr#x, col#x]
++- Generate explode(arr#x), false, [col#x]
+   +- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
+-- !query analysis
+Project [col#x, arr#x]
++- Generate explode(arr#x), false, [col#x]
+   +- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
+      +- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
index 69538975742d..50ba6480efd9 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -7,7 +7,7 @@ SELECT 1 + explode(array(1, 2, 3));
 -- multiple generators should work
 SELECT explode(array(0, 1, 2)), explode(array(10, 20));
 
--- multiple generators are processed in left-to-right order regardless of 
internal rule ordering
+-- multiple generators' order is not fixed and depends on rule ordering
 SELECT explode(array(sin(0), 1, 2)), explode(array(10, 20));
 
 -- multiple generators in aggregate should fail
@@ -127,3 +127,9 @@ SELECT explode(array(1, 2, 3)) as col, count(*) OVER ();
 
 -- generator with window function and aggregate together resolves in order 
Aggregate -> Window -> Generator
 SELECT explode(array(1, 2, 3)), count(*) OVER (), count(*);
+
+-- generator LCA left-to-right should work
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as 
col;
+
+-- generator LCA right-to-left should work
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as 
arr;
diff --git 
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
index 995798ca246b..dfe39028ea01 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -460,3 +460,23 @@ struct<col:int,count(1) OVER (ROWS BETWEEN UNBOUNDED 
PRECEDING AND UNBOUNDED FOL
 1      1       1
 2      1       1
 3      1       1
+
+
+-- !query
+SELECT explode(array(array(0), array(1), array(2))) as arr, explode(arr) as col
+-- !query schema
+struct<arr:array<int>,col:int>
+-- !query output
+[0]    0
+[1]    1
+[2]    2
+
+
+-- !query
+SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as arr
+-- !query schema
+struct<col:int,arr:array<int>>
+-- !query output
+0      [0]
+1      [1]
+2      [2]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to