This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c8cdeeca9236 [SPARK-51751][SQL] Fix multiple rCTEs for one WITH statement that reference each other c8cdeeca9236 is described below commit c8cdeeca92364986d9a133bd063b88d1cbd49bc8 Author: Pavle Martinovic <34302662+pajar...@users.noreply.github.com> AuthorDate: Thu Apr 10 10:27:23 2025 +0800 [SPARK-51751][SQL] Fix multiple rCTEs for one WITH statement that reference each other ### What changes were proposed in this pull request? Add a check to UnionLoopExec that the UnionLoopRef in its subtree is referring to the correct query. This is important since if we call another rCTE from it, it will also have a UnionLoopRef which needs to not be replaced with the reference. ### Why are the changes needed? Multiple rCTEs within one WITH statement ended in an infinite recursion. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New golden file test added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50544 from Pajaraja/pavle-martinovic_data/multiplerctesfix. Lead-authored-by: Pavle Martinovic <34302662+pajar...@users.noreply.github.com> Co-authored-by: pavle-martinovic_data <pavle.martino...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/execution/UnionLoopExec.scala | 2 +- .../analyzer-results/cte-recursion.sql.out | 44 ++++++++++++++++++++++ .../resources/sql-tests/inputs/cte-recursion.sql | 15 +++++++- .../sql-tests/results/cte-recursion.sql.out | 20 ++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala index 561f9cd72efb..85c7a57467b5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala @@ -188,7 +188,7 @@ case class UnionLoopExec( // This way we support only UNION ALL case. Additional case should be added for UNION case. // One way of supporting UNION case can be seen at SPARK-24497 PR from Peter Toth. val newRecursion = recursion.transform { - case r: UnionLoopRef => + case r: UnionLoopRef if r.loopId == loopId => val logicalPlan = prevDF.logicalPlan val optimizedPlan = prevDF.queryExecution.optimizedPlan val (stats, constraints) = rewriteStatsAndConstraints(logicalPlan, optimizedPlan) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out index 88f3f675c87b..33e486e6dc88 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out @@ -1256,3 +1256,47 @@ WithCTE +- Project [id#x, xid#x] +- SubqueryAlias t +- CTERelationRef xxxx, true, [id#x, xid#x], false, false + + +-- !query +WITH RECURSIVE t1(a, b) AS ( + SELECT 1, 1 + UNION ALL + SELECT a + b, a FROM t1 WHERE a < 20 +), +t2(n) AS ( + SELECT 1 + UNION ALL + SELECT n + 1 FROM t2, t1 WHERE n + 1 = a +) +SELECT * FROM t2 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [1#x AS a#x, 1#x AS b#x] +: +- UnionLoop xxxx +: :- Project [1 AS 1#x, 1 AS 1#x] +: : +- OneRowRelation +: +- Project [(a#x + b#x) AS (a + b)#x, a#x] +: +- Filter (a#x < 20) +: +- SubqueryAlias t1 +: +- Project [1#x AS a#x, 1#x AS b#x] +: +- UnionLoopRef xxxx, [1#x, 1#x], false +:- CTERelationDef xxxx, false +: +- SubqueryAlias t2 +: +- Project [1#x AS n#x] +: +- UnionLoop xxxx +: :- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [(n#x + 1) AS (n + 1)#x] +: +- Filter ((n#x + 1) = a#x) +: +- Join Inner +: :- SubqueryAlias t2 +: : +- Project [1#x AS n#x] +: : +- UnionLoopRef xxxx, [1#x], false +: +- SubqueryAlias t1 +: +- CTERelationRef xxxx, true, [a#x, b#x], false, false ++- Project [n#x] + +- SubqueryAlias t2 + +- CTERelationRef xxxx, true, [n#x], false, false diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql index 2780462478b0..c45f62196430 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql @@ -486,4 +486,17 @@ WITH RECURSIVE UNION ALL SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3 ) -SELECT * FROM t \ No newline at end of file +SELECT * FROM t; + +-- rCTE referencing other rCTE +WITH RECURSIVE t1(a, b) AS ( + SELECT 1, 1 + UNION ALL + SELECT a + b, a FROM t1 WHERE a < 20 +), +t2(n) AS ( + SELECT 1 + UNION ALL + SELECT n + 1 FROM t2, t1 WHERE n + 1 = a +) +SELECT * FROM t2; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out index 90762f81fd51..cc4e01e11ca6 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out @@ -1178,3 +1178,23 @@ struct<id:int,xid:int> 3 212 3 221 3 222 + + +-- !query +WITH RECURSIVE t1(a, b) AS ( + SELECT 1, 1 + UNION ALL + SELECT a + b, a FROM t1 WHERE a < 20 +), +t2(n) AS ( + SELECT 1 + UNION ALL + SELECT n + 1 FROM t2, t1 WHERE n + 1 = a +) +SELECT * FROM t2 +-- !query schema +struct<n:int> +-- !query output +1 +2 +3 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org