This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ece4bc0759b8 [SPARK-51752][SQL] Enable rCTE referencing from within a CTE ece4bc0759b8 is described below commit ece4bc0759b8c508141167e3d910553091903843 Author: pavle-martinovic_data <pavle.martino...@databricks.com> AuthorDate: Sun Apr 13 20:53:18 2025 +0800 [SPARK-51752][SQL] Enable rCTE referencing from within a CTE ### What changes were proposed in this pull request? Enabling the possibility of a CTE referencing the recursive CTE it is inside of. This is done by modifying the CTESubstitution file, consisting of two main parts: - If traverseAndSubstituteCTE is called from resolveCTERelations when attempting to resolve a recursive CTE to resolve all the CTEs it references, we remember this ancestor rCTE in case any of the child CTEs want to reference it. If we encounter another rCTE inside of the rCTE (which is only allowed in the anchor), we define it to be the new anchor rCTE. - Even though the first part is enough to resolve these CTEs, a new problem arises when trying to identify whether a CTE is recursive or not, since if CTE0 is recursive and CTE1 is a CTE inside CTE0 that references CTE0, the only way to tell whether CTE0 is recursive is to check inside CTE1. For this reason we do in-place CTESubstitution inside of recursive CTEs - Additionally we modify the rules to identify cases with in-place CTE substitutions in ResolveWithCTE. ### Why are the changes needed? To make queries that self reference work. An example of such a query is: ``` WITH RECURSIVE t1 AS ( SELECT 1 AS n UNION ALL WITH t2 AS (SELECT n + 1 FROM t1 WHERE n < 5) SELECT * FROM t2 ) SELECT * FROM t1; ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing golden file tests for this that didn't work before. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50546 from Pajaraja/pavle-martinovic_data/ctewithinrctefix. Lead-authored-by: pavle-martinovic_data <pavle.martino...@databricks.com> Co-authored-by: Pavle Martinovic <34302662+pajar...@users.noreply.github.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/CTESubstitution.scala | 69 +++-- .../sql/catalyst/analysis/ResolveWithCTE.scala | 105 ++++++- .../analyzer-results/cte-recursion.sql.out | 322 ++++++++++++++++----- .../analyzer-results/postgreSQL/with.sql.out | 39 +-- .../resources/sql-tests/inputs/cte-recursion.sql | 57 +++- .../sql-tests/results/cte-recursion.sql.out | 148 +++++++--- .../sql-tests/results/postgreSQL/with.sql.out | 2 +- 7 files changed, 584 insertions(+), 158 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index 4bc3300b7574..5bbe85705ac1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -87,11 +87,11 @@ object CTESubstitution extends Rule[LogicalPlan] { LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { case LegacyBehaviorPolicy.EXCEPTION => assertNoNameConflictsInCTE(plan) - traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs) + traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs, None) case LegacyBehaviorPolicy.LEGACY => (legacyTraverseAndSubstituteCTE(plan, cteDefs), None) case LegacyBehaviorPolicy.CORRECTED => - traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs) + traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs, None) } if (cteDefs.isEmpty) { substituted @@ -162,7 +162,7 @@ object CTESubstitution extends Rule[LogicalPlan] { messageParameters = Map.empty) } val resolvedCTERelations = resolveCTERelations(relations, isLegacy = true, - forceInline = false, Seq.empty, cteDefs, allowRecursion) + forceInline = false, Seq.empty, cteDefs, None, allowRecursion) substituteCTE(child, alwaysInline = true, resolvedCTERelations, None) } } @@ -202,6 +202,8 @@ object CTESubstitution extends Rule[LogicalPlan] { * @param forceInline always inline the CTE relations if this is true * @param outerCTEDefs already resolved outer CTE definitions with names * @param cteDefs all accumulated CTE definitions + * @param recursiveCTERelationAncestor contains information of whether we are in a recursive CTE, + * as well as what CTE that is. * @return the plan where CTE substitution is applied and optionally the last substituted `With` * where CTE definitions will be gathered to */ @@ -209,7 +211,9 @@ object CTESubstitution extends Rule[LogicalPlan] { plan: LogicalPlan, forceInline: Boolean, outerCTEDefs: Seq[(String, CTERelationDef)], - cteDefs: ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = { + cteDefs: ArrayBuffer[CTERelationDef], + recursiveCTERelationAncestor: Option[(String, CTERelationDef)] + ): (LogicalPlan, Option[LogicalPlan]) = { var firstSubstituted: Option[LogicalPlan] = None val newPlan = plan.resolveOperatorsDownWithPruning( _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) { @@ -220,18 +224,31 @@ object CTESubstitution extends Rule[LogicalPlan] { errorClass = "RECURSIVE_CTE_WHEN_INLINING_IS_FORCED", messageParameters = Map.empty) } - val resolvedCTERelations = + + val tempCteDefs = ArrayBuffer.empty[CTERelationDef] + val resolvedCTERelations = if (recursiveCTERelationAncestor.isDefined) { + resolveCTERelations(relations, isLegacy = false, forceInline = false, outerCTEDefs, + tempCteDefs, recursiveCTERelationAncestor, allowRecursion) ++ outerCTEDefs + } else { resolveCTERelations(relations, isLegacy = false, forceInline, outerCTEDefs, cteDefs, - allowRecursion) ++ outerCTEDefs + recursiveCTERelationAncestor, allowRecursion) ++ outerCTEDefs + } val substituted = substituteCTE( - traverseAndSubstituteCTE(child, forceInline, resolvedCTERelations, cteDefs)._1, + traverseAndSubstituteCTE(child, forceInline, resolvedCTERelations, cteDefs, + recursiveCTERelationAncestor)._1, + // If we are resolving CTEs in a recursive CTE, we need to inline it in case the + // CTE contains the self reference. forceInline, resolvedCTERelations, None) if (firstSubstituted.isEmpty) { firstSubstituted = Some(substituted) } - substituted + if (recursiveCTERelationAncestor.isDefined) { + withCTEDefs(substituted, tempCteDefs.toSeq) + } else { + substituted + } case other => other.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) { @@ -247,6 +264,7 @@ object CTESubstitution extends Rule[LogicalPlan] { forceInline: Boolean, outerCTEDefs: Seq[(String, CTERelationDef)], cteDefs: ArrayBuffer[CTERelationDef], + recursiveCTERelationAncestor: Option[(String, CTERelationDef)], allowRecursion: Boolean): Seq[(String, CTERelationDef)] = { val alwaysInline = isLegacy || forceInline var resolvedCTERelations = if (alwaysInline) { @@ -255,6 +273,21 @@ object CTESubstitution extends Rule[LogicalPlan] { outerCTEDefs } for ((name, relation) <- relations) { + // If recursion is allowed (RECURSIVE keyword specified) + // then it has higher priority than outer or previous relations. + // Therefore, we construct a `CTERelationDef` for the current relation. + // Later if we encounter unresolved relation which we need to find which CTE Def it is + // referencing to, we first check if it is a reference to this one. If yes, then we set the + // reference as being recursive. + val recursiveCTERelation = if (allowRecursion) { + Some(name -> CTERelationDef(relation)) + } else { + // If there is an outer recursive CTE relative to this one, and this one isn't recursive, + // then the self reference with the first-check priority is going to be the CteRelationDef + // of this recursive ancestor. + recursiveCTERelationAncestor + } + val innerCTEResolved = if (isLegacy) { // In legacy mode, outer CTE relations take precedence. Here we don't resolve the inner // `With` nodes, later we will substitute `UnresolvedRelation`s with outer CTE relations. @@ -305,26 +338,20 @@ object CTESubstitution extends Rule[LogicalPlan] { } else { resolvedCTERelations } - traverseAndSubstituteCTE(relation, forceInline, nonConflictingCTERelations, cteDefs)._1 + traverseAndSubstituteCTE(relation, forceInline, nonConflictingCTERelations, + cteDefs, recursiveCTERelation)._1 } - // If recursion is allowed (RECURSIVE keyword specified) - // then it has higher priority than outer or previous relations. - // Therefore, we construct a `CTERelationDef` for the current relation. - // Later if we encounter unresolved relation which we need to find which CTE Def it is - // referencing to, we first check if it is a reference to this one. If yes, then we set the - // reference as being recursive. - val recursiveCTERelation = if (allowRecursion) { - Some(name -> CTERelationDef(relation)) - } else { - None - } // CTE definition can reference a previous one or itself if recursion allowed. val substituted = substituteCTE(innerCTEResolved, alwaysInline, resolvedCTERelations, recursiveCTERelation) - val cteRelation = recursiveCTERelation + val cteRelation = if (allowRecursion) { + recursiveCTERelation .map(_._2.copy(child = substituted)) .getOrElse(CTERelationDef(substituted)) + } else { + CTERelationDef(substituted) + } if (!alwaysInline) { cteDefs += cteRelation } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala index 5a5acb24c697..6ec465f7ffe7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala @@ -83,6 +83,26 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = loop)) } + // Simple case of duplicating (UNION ALL) clause. + case alias @ SubqueryAlias(_, withCTE @ WithCTE( + Union(Seq(anchor, recursion), false, false), innerCteDefs)) => + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, None)) + } + val loop = UnionLoop( + cteDef.id, + anchor, + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, None)) + cteDef.copy(child = alias.copy(child = withCTE.copy( + plan = loop, cteDefs = newInnerCteDefs))) + } + // The case of CTE name followed by a parenthesized list of column name(s), eg. // WITH RECURSIVE t(n). case alias @ SubqueryAlias(_, @@ -100,7 +120,31 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = columnAlias.copy(child = loop))) } - // If the recursion is described with an UNION (deduplicating) clause then the + // The case of CTE name followed by a parenthesized list of column name(s), eg. + // WITH RECURSIVE t(n). + case alias @ SubqueryAlias(_, + columnAlias @ UnresolvedSubqueryColumnAliases( + colNames, + withCTE @ WithCTE(Union(Seq(anchor, recursion), false, false), innerCteDefs) + )) => + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, Some(colNames))) + } + val loop = UnionLoop( + cteDef.id, + anchor, + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, Some(colNames))) + cteDef.copy(child = alias.copy(child = columnAlias.copy( + child = withCTE.copy(plan = loop, cteDefs = newInnerCteDefs)))) + } + + // If the recursion is described with a UNION (deduplicating) clause then the // recursive term should not return those rows that have been calculated previously, // and we exclude those rows from the current iteration result. case alias @ SubqueryAlias(_, @@ -123,6 +167,34 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = loop)) } + // UNION case with CTEs inside. + case alias @ SubqueryAlias(_, withCTE @ WithCTE( + Distinct(Union(Seq(anchor, recursion), false, false)), innerCteDefs)) => + cteDef.failAnalysis( + errorClass = "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + messageParameters = Map.empty) + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, None)) + } + val loop = UnionLoop( + cteDef.id, + Distinct(anchor), + Except( + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, None), + UnionLoopRef(cteDef.id, anchor.output, true), + isAll = false + ) + ) + cteDef.copy(child = alias.copy(child = withCTE.copy( + plan = loop, cteDefs = newInnerCteDefs))) + } + // The case of CTE name followed by a parenthesized list of column name(s). case alias @ SubqueryAlias(_, columnAlias@UnresolvedSubqueryColumnAliases( @@ -147,6 +219,37 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = columnAlias.copy(child = loop))) } + // The case of CTE name followed by a parenthesized list of column name(s). + case alias @ SubqueryAlias(_, + columnAlias@UnresolvedSubqueryColumnAliases( + colNames, + WithCTE(Distinct(Union(Seq(anchor, recursion), false, false)), innerCteDefs) + )) => + cteDef.failAnalysis( + errorClass = "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + messageParameters = Map.empty) + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, Some(colNames))) + } + val loop = UnionLoop( + cteDef.id, + Distinct(anchor), + Except( + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, Some(colNames)), + UnionLoopRef(cteDef.id, anchor.output, true), + isAll = false + ) + ) + cteDef.copy(child = alias.copy(child = columnAlias.copy( + child = withCTE.copy(plan = loop, cteDefs = newInnerCteDefs)))) + } + case other => // We do not support cases of sole Union (needs a SubqueryAlias above it), nor // Project (as UnresolvedSubqueryColumnAliases have not been substituted with the diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out index 33e486e6dc88..f95f7a00bb4c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out @@ -378,28 +378,32 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1 -- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 100, - "stopIndex" : 101, - "fragment" : "t1" - } ] -} +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- UnionLoop xxxx +: :- Project [1 AS level#x] +: : +- OneRowRelation +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 10) +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [level#x], false +: +- Project [(level + 1)#x] +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [(level + 1)#x], false, false ++- Project [level#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [level#x], false, false -- !query @@ -424,21 +428,34 @@ WITH ) SELECT * FROM t2 -- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 159, - "stopIndex" : 160, - "fragment" : "t1" - } ] -} +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [1 AS 1#x] +: +- OneRowRelation +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- UnionLoop xxxx +: :- Project [1 AS level#x] +: : +- OneRowRelation +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t3 +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 10) +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [level#x], false +: +- Project [(level + 1)#x] +: +- SubqueryAlias t3 +: +- CTERelationRef xxxx, true, [(level + 1)#x], false, false +:- CTERelationDef xxxx, false +: +- SubqueryAlias t2 +: +- Project [level#x] +: +- SubqueryAlias t1 +: +- CTERelationRef xxxx, true, [level#x], false, false ++- Project [level#x] + +- SubqueryAlias t2 + +- CTERelationRef xxxx, true, [level#x], false, false -- !query @@ -962,24 +979,25 @@ SELECT * FROM r2 -- !query analysis WithCTE :- CTERelationDef xxxx, false -: +- SubqueryAlias r1 -: +- UnionLoop xxxx -: :- Project [0 AS innerlevel#x] -: : +- OneRowRelation -: +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] -: +- Filter (innerlevel#x < 3) -: +- SubqueryAlias r1 -: +- UnionLoopRef xxxx, [innerlevel#x], false -:- CTERelationDef xxxx, false : +- SubqueryAlias r2 -: +- UnionLoop xxxx -: :- Project [0 AS outerlevel#x, innerlevel#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias r1 -: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false -: +- Project [(outerlevel#x + 1) AS (outerlevel + 1)#x, innerlevel#x] -: +- Filter (outerlevel#x < 3) -: +- SubqueryAlias r2 -: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false +: : +- UnionLoop xxxx +: : :- Project [0 AS innerlevel#x] +: : : +- OneRowRelation +: : +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] +: : +- Filter (innerlevel#x < 3) +: : +- SubqueryAlias r1 +: : +- UnionLoopRef xxxx, [innerlevel#x], false +: +- UnionLoop xxxx +: :- Project [0 AS outerlevel#x, innerlevel#x] +: : +- SubqueryAlias r1 +: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false +: +- Project [(outerlevel#x + 1) AS (outerlevel + 1)#x, innerlevel#x] +: +- Filter (outerlevel#x < 3) +: +- SubqueryAlias r2 +: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false +- Project [outerlevel#x, innerlevel#x] +- SubqueryAlias r2 +- CTERelationRef xxxx, true, [outerlevel#x, innerlevel#x], false, false @@ -1001,25 +1019,26 @@ SELECT * FROM r WithCTE :- CTERelationDef xxxx, false : +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false -: +- SubqueryAlias r : +- Project [level#x AS level#x] -: +- Union false, false -: :- Project [level#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias r -: : +- CTERelationRef xxxx, true, [level#x], false, false -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- CTERelationRef xxxx, true, [level#x], false, false +: : +- Project [col1#x AS level#x] +: : +- UnionLoop xxxx +: : :- LocalRelation [col1#x] +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 3) +: : +- SubqueryAlias r +: : +- Project [col1#x AS level#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: +- Union false, false +: :- Project [level#x] +: : +- SubqueryAlias r +: : +- CTERelationRef xxxx, true, [level#x], false, false +: +- Project [(level#x + 1) AS (level + 1)#x] +: +- Filter (level#x < 3) +: +- SubqueryAlias r +: +- CTERelationRef xxxx, true, [level#x], false, false +- Project [level#x] +- SubqueryAlias r +- CTERelationRef xxxx, true, [level#x], false, false @@ -1041,21 +1060,22 @@ SELECT * FROM r WithCTE :- CTERelationDef xxxx, false : +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false -: +- SubqueryAlias r : +- Project [level#x AS level#x] : +- UnionLoop xxxx -: :- Project [level#x] -: : +- SubqueryAlias r -: : +- CTERelationRef xxxx, true, [level#x], false, false +: :- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias r +: : : +- Project [col1#x AS level#x] +: : : +- UnionLoop xxxx +: : : :- LocalRelation [col1#x] +: : : +- Project [(level#x + 1) AS (level + 1)#x] +: : : +- Filter (level#x < 3) +: : : +- SubqueryAlias r +: : : +- Project [col1#x AS level#x] +: : : +- UnionLoopRef xxxx, [col1#x], false +: : +- Project [level#x] +: : +- SubqueryAlias r +: : +- CTERelationRef xxxx, true, [level#x], false, false : +- Project [(level#x + 1) AS (level + 1)#x] : +- Filter (level#x < 3) : +- SubqueryAlias r @@ -1300,3 +1320,151 @@ WithCTE +- Project [n#x] +- SubqueryAlias t2 +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [col1#x AS n#x] +: +- UnionLoop xxxx +: :- LocalRelation [col1#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [(n + 1)#x AS j#x] +: : +- Project [(n#x + 1) AS (n + 1)#x] +: : +- SubqueryAlias t1 +: : +- Project [col1#x AS n#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t3 +: : +- Project [j#x AS k#x] +: : +- Project [j#x] +: : +- SubqueryAlias t2 +: : +- CTERelationRef xxxx, true, [j#x], false, false +: +- Project [k#x] +: +- Filter (k#x <= 5) +: +- SubqueryAlias t3 +: +- CTERelationRef xxxx, true, [k#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias r2 +: +- Project [outerlevel#x AS outerlevel1#x, innerlevel#x AS innerlevel1#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias r1 +: : +- UnionLoop xxxx +: : :- Project [0 AS innerlevel#x] +: : : +- OneRowRelation +: : +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] +: : +- Filter (innerlevel#x < 3) +: : +- SubqueryAlias r1 +: : +- UnionLoopRef xxxx, [innerlevel#x], false +: +- UnionLoop xxxx +: :- Project [0 AS outerlevel#x, innerlevel#x] +: : +- SubqueryAlias r1 +: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false +: +- Project [(outerlevel1#x + 1) AS (outerlevel1 + 1)#x, innerlevel1#x] +: +- Filter (outerlevel1#x < 3) +: +- SubqueryAlias r2 +: +- Project [outerlevel#x AS outerlevel1#x, innerlevel#x AS innerlevel1#x] +: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false ++- Project [outerlevel1#x, innerlevel1#x] + +- SubqueryAlias r2 + +- CTERelationRef xxxx, true, [outerlevel1#x, innerlevel1#x], false, false + + +-- !query +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [1#x AS n#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [n#x AS n#x] +: : +- Project [n#x] +: : +- SubqueryAlias t1 +: : +- Project [1#x AS n#x] +: : +- UnionLoopRef xxxx, [1#x], false +: +- UnionLoop xxxx +: :- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [(n#x + 1) AS (n + 1)#x] +: +- Filter (n#x < 5) +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [n#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [n#x AS n#x] +: : +- Project [n#x] +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [n#x], false +: +- UnionLoop xxxx +: :- Project [1 AS n#x] +: : +- OneRowRelation +: +- Project [(n#x + 1) AS (n + 1)#x] +: +- Filter (n#x < 5) +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [n#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out index 2fdced93217d..32388ed10e73 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out @@ -1503,27 +1503,28 @@ SELECT * FROM t -- !query analysis WithCTE :- CTERelationDef xxxx, false -: +- SubqueryAlias s -: +- Project [col1#x AS i#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(i#x + 1) AS (i + 1)#x] -: +- Filter (i#x < 10) -: +- SubqueryAlias s -: +- Project [col1#x AS i#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false : +- SubqueryAlias t : +- Project [i#x AS j#x] -: +- UnionLoop xxxx -: :- Project [i#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias s -: : +- CTERelationRef xxxx, true, [i#x], false, false -: +- Project [(j#x + 1) AS (j + 1)#x] -: +- Filter (j#x < 10) -: +- SubqueryAlias t -: +- Project [i#x AS j#x] -: +- UnionLoopRef xxxx, [i#x], false +: : +- Project [col1#x AS i#x] +: : +- UnionLoop xxxx +: : :- LocalRelation [col1#x] +: : +- Project [(i#x + 1) AS (i + 1)#x] +: : +- Filter (i#x < 10) +: : +- SubqueryAlias s +: : +- Project [col1#x AS i#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: +- UnionLoop xxxx +: :- Project [i#x] +: : +- SubqueryAlias s +: : +- CTERelationRef xxxx, true, [i#x], false, false +: +- Project [(j#x + 1) AS (j + 1)#x] +: +- Filter (j#x < 10) +: +- SubqueryAlias t +: +- Project [i#x AS j#x] +: +- UnionLoopRef xxxx, [i#x], false +- Project [j#x] +- SubqueryAlias t +- CTERelationRef xxxx, true, [j#x], false, false @@ -1622,7 +1623,7 @@ SELECT * FROM outermost ORDER BY 1 -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + "errorClass" : "INVALID_RECURSIVE_REFERENCE.NUMBER", "sqlState" : "42836", "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql index c45f62196430..97638b942464 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql @@ -143,20 +143,18 @@ WITH SELECT * FROM t2; --- recursive reference is not allowed in a nested CTE --- TABLE_OR_VIEW_NOT_FOUND is thrown now, although it some check should be added to exactly inform --- that this is not allowed +-- recursive reference in a nested CTE WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1; --- recursive reference and conflicting outer CTEs are not allowed in a nested CTE +-- recursive reference and conflicting outer CTEs in a nested CTE SET spark.sql.legacy.ctePrecedencePolicy=CORRECTED; WITH t1 AS (SELECT 1), @@ -499,4 +497,51 @@ t2(n) AS ( UNION ALL SELECT n + 1 FROM t2, t1 WHERE n + 1 = a ) -SELECT * FROM t2; \ No newline at end of file +SELECT * FROM t2; + +-- Multiple CTEs within rCTE +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1; + +-- Column aliases with CTEs inside rCTEs +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2; + +-- An inner cte that is defined for both the anchor and recursion but called only in the recursion +-- with subquery alias +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1; + +-- An inner cte that is defined for both the anchor and recursion but called only in the recursion +-- without query alias +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out index cc4e01e11ca6..6b12d6a159bc 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out @@ -358,30 +358,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1 -- !query schema -struct<> +struct<level:int> -- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 100, - "stopIndex" : 101, - "fragment" : "t1" - } ] -} +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 -- !query @@ -408,23 +403,18 @@ WITH ) SELECT * FROM t2 -- !query schema -struct<> +struct<level:int> -- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 159, - "stopIndex" : 160, - "fragment" : "t1" - } ] -} +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 -- !query @@ -1198,3 +1188,95 @@ struct<n:int> 1 2 3 + + +-- !query +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1 +-- !query schema +struct<n:int> +-- !query output +1 +2 +3 +4 +5 + + +-- !query +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2 +-- !query schema +struct<outerlevel1:int,innerlevel1:int> +-- !query output +0 0 +0 1 +0 2 +0 3 +1 0 +1 1 +1 2 +1 3 +2 0 +2 1 +2 2 +2 3 +3 0 +3 1 +3 2 +3 3 + + +-- !query +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query schema +struct<n:int> +-- !query output +1 +2 +3 +4 +5 + + +-- !query +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query schema +struct<n:int> +-- !query output +1 +2 +3 +4 +5 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out index 53d1be3b1447..cfb021aeca75 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out @@ -1381,7 +1381,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + "errorClass" : "INVALID_RECURSIVE_REFERENCE.NUMBER", "sqlState" : "42836", "queryContext" : [ { "objectType" : "", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org