This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c8cdeeca9236 [SPARK-51751][SQL] Fix multiple rCTEs for one WITH 
statement that reference each other
c8cdeeca9236 is described below

commit c8cdeeca92364986d9a133bd063b88d1cbd49bc8
Author: Pavle Martinovic <34302662+pajar...@users.noreply.github.com>
AuthorDate: Thu Apr 10 10:27:23 2025 +0800

    [SPARK-51751][SQL] Fix multiple rCTEs for one WITH statement that reference 
each other
    
    ### What changes were proposed in this pull request?
    
    Add a check to UnionLoopExec that the UnionLoopRef in its subtree is 
referring to the correct query. This is important since if we call another rCTE 
from it, it will also have a UnionLoopRef which needs to not be replaced with 
the reference.
    
    ### Why are the changes needed?
    
    Multiple rCTEs within one WITH statement ended in an infinite recursion.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    New golden file test added.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50544 from Pajaraja/pavle-martinovic_data/multiplerctesfix.
    
    Lead-authored-by: Pavle Martinovic 
<34302662+pajar...@users.noreply.github.com>
    Co-authored-by: pavle-martinovic_data <pavle.martino...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/execution/UnionLoopExec.scala |  2 +-
 .../analyzer-results/cte-recursion.sql.out         | 44 ++++++++++++++++++++++
 .../resources/sql-tests/inputs/cte-recursion.sql   | 15 +++++++-
 .../sql-tests/results/cte-recursion.sql.out        | 20 ++++++++++
 4 files changed, 79 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
index 561f9cd72efb..85c7a57467b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
@@ -188,7 +188,7 @@ case class UnionLoopExec(
         // This way we support only UNION ALL case. Additional case should be 
added for UNION case.
         // One way of supporting UNION case can be seen at SPARK-24497 PR from 
Peter Toth.
         val newRecursion = recursion.transform {
-          case r: UnionLoopRef =>
+          case r: UnionLoopRef if r.loopId == loopId =>
             val logicalPlan = prevDF.logicalPlan
             val optimizedPlan = prevDF.queryExecution.optimizedPlan
             val (stats, constraints) = rewriteStatsAndConstraints(logicalPlan, 
optimizedPlan)
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
index 88f3f675c87b..33e486e6dc88 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
@@ -1256,3 +1256,47 @@ WithCTE
 +- Project [id#x, xid#x]
    +- SubqueryAlias t
       +- CTERelationRef xxxx, true, [id#x, xid#x], false, false
+
+
+-- !query
+WITH RECURSIVE t1(a, b) AS (
+    SELECT 1, 1
+    UNION ALL
+    SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+    SELECT 1
+    UNION ALL
+    SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias t1
+:     +- Project [1#x AS a#x, 1#x AS b#x]
+:        +- UnionLoop xxxx
+:           :- Project [1 AS 1#x, 1 AS 1#x]
+:           :  +- OneRowRelation
+:           +- Project [(a#x + b#x) AS (a + b)#x, a#x]
+:              +- Filter (a#x < 20)
+:                 +- SubqueryAlias t1
+:                    +- Project [1#x AS a#x, 1#x AS b#x]
+:                       +- UnionLoopRef xxxx, [1#x, 1#x], false
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias t2
+:     +- Project [1#x AS n#x]
+:        +- UnionLoop xxxx
+:           :- Project [1 AS 1#x]
+:           :  +- OneRowRelation
+:           +- Project [(n#x + 1) AS (n + 1)#x]
+:              +- Filter ((n#x + 1) = a#x)
+:                 +- Join Inner
+:                    :- SubqueryAlias t2
+:                    :  +- Project [1#x AS n#x]
+:                    :     +- UnionLoopRef xxxx, [1#x], false
+:                    +- SubqueryAlias t1
+:                       +- CTERelationRef xxxx, true, [a#x, b#x], false, false
++- Project [n#x]
+   +- SubqueryAlias t2
+      +- CTERelationRef xxxx, true, [n#x], false, false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql 
b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
index 2780462478b0..c45f62196430 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
@@ -486,4 +486,17 @@ WITH RECURSIVE
         UNION ALL
         SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3
     )
-SELECT * FROM t
\ No newline at end of file
+SELECT * FROM t;
+
+-- rCTE referencing other rCTE
+WITH RECURSIVE t1(a, b) AS (
+    SELECT 1, 1
+    UNION ALL
+    SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+    SELECT 1
+    UNION ALL
+    SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2;
\ No newline at end of file
diff --git 
a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out 
b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
index 90762f81fd51..cc4e01e11ca6 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
@@ -1178,3 +1178,23 @@ struct<id:int,xid:int>
 3      212
 3      221
 3      222
+
+
+-- !query
+WITH RECURSIVE t1(a, b) AS (
+    SELECT 1, 1
+    UNION ALL
+    SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+    SELECT 1
+    UNION ALL
+    SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2
+-- !query schema
+struct<n:int>
+-- !query output
+1
+2
+3


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to