This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b44e243dbd9c [SQL][MINOR] Update comments and remove dead code in UnionLoop b44e243dbd9c is described below commit b44e243dbd9c6b2fa546182f93e8bb1a2de2230a Author: pavle-martinovic_data <pavle.martino...@databricks.com> AuthorDate: Tue Jun 10 15:03:09 2025 -0700 [SQL][MINOR] Update comments and remove dead code in UnionLoop ### What changes were proposed in this pull request? Remove function assertNoRecursiveCTE which is no longer used (when recursive references from subqueries were enabled, this function became obsolete). Also edit comments and error message to reflect current state. ### Why are the changes needed? Code cleanliness. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing golden file tests in cte-recursion and postgreSQL/with.sql. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51097 from Pajaraja/pavle-martinovic_data/rctecleanup. Authored-by: pavle-martinovic_data <pavle.martino...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../src/main/resources/error/error-conditions.json | 7 +------ .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 20 ++------------------ .../spark/sql/catalyst/optimizer/Optimizer.scala | 3 +-- 3 files changed, 4 insertions(+), 26 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index fe14f4e82793..3ba7f71b00d4 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3416,14 +3416,9 @@ "Invalid recursive reference found inside WITH RECURSIVE clause." ], "subClass" : { - "NUMBER" : { - "message" : [ - "Multiple self-references to one recursive CTE are not allowed." - ] - }, "PLACE" : { "message" : [ - "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins, in aggregates, and in subquery expressions." + "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins and in aggregates" ] } }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index f34cf8ce5349..863398de9cc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -947,30 +947,14 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString if (expr.plan.isStreaming) { plan.failAnalysis("INVALID_SUBQUERY_EXPRESSION.STREAMING_QUERY", Map.empty) } - assertNoRecursiveCTE(expr.plan) checkAnalysis0(expr.plan) ValidateSubqueryExpression(plan, expr) } - private def assertNoRecursiveCTE(plan: LogicalPlan): Unit = { - plan.foreach { - case r: CTERelationRef if r.recursive => - throw new AnalysisException( - errorClass = "INVALID_RECURSIVE_REFERENCE.PLACE", - messageParameters = Map.empty) - case p => p.expressions.filter(_.containsPattern(PLAN_EXPRESSION)).foreach { - expr => expr.foreach { - case s: SubqueryExpression => assertNoRecursiveCTE(s.plan) - case _ => - } - } - } - } - /** * Validate that collected metrics names are unique. The same name cannot be used for metrics - * with different results. However multiple instances of metrics with with same result and name - * are allowed (e.g. self-joins). + * with different results. However, multiple instances of metrics with same result and name are + * allowed (e.g. self-joins). */ private def checkCollectedMetrics(plan: LogicalPlan): Unit = { val metricsMap = mutable.Map.empty[String, CollectMetrics] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 66c3bfb46530..8236fee4bcb9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1041,8 +1041,7 @@ object ColumnPruning extends Rule[LogicalPlan] { p } - // TODO: Pruning `UnionLoop`s needs to take into account both the outer `Project` and the inner - // `UnionLoopRef` nodes. + // Avoid pruning UnionLoop because of its recursive nature. case p @ Project(_, _: UnionLoop) => p // Prune unnecessary window expressions --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org