(spark) branch master updated: [SQL][MINOR] Update comments and remove dead code in UnionLoop

wenchen Tue, 10 Jun 2025 15:03:33 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b44e243dbd9c [SQL][MINOR] Update comments and remove dead code in 
UnionLoop
b44e243dbd9c is described below

commit b44e243dbd9c6b2fa546182f93e8bb1a2de2230a
Author: pavle-martinovic_data <pavle.martino...@databricks.com>
AuthorDate: Tue Jun 10 15:03:09 2025 -0700

    [SQL][MINOR] Update comments and remove dead code in UnionLoop
    
    ### What changes were proposed in this pull request?
    
    Remove function assertNoRecursiveCTE which is no longer used (when 
recursive references from subqueries were enabled, this function became 
obsolete).
    Also edit comments and error message to reflect current state.
    
    ### Why are the changes needed?
    
    Code cleanliness.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing golden file tests in cte-recursion and postgreSQL/with.sql.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51097 from Pajaraja/pavle-martinovic_data/rctecleanup.
    
    Authored-by: pavle-martinovic_data <pavle.martino...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../src/main/resources/error/error-conditions.json   |  7 +------
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala  | 20 ++------------------
 .../spark/sql/catalyst/optimizer/Optimizer.scala     |  3 +--
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index fe14f4e82793..3ba7f71b00d4 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3416,14 +3416,9 @@
       "Invalid recursive reference found inside WITH RECURSIVE clause."
     ],
     "subClass" : {
-      "NUMBER" : {
-        "message" : [
-          "Multiple self-references to one recursive CTE are not allowed."
-        ]
-      },
       "PLACE" : {
         "message" : [
-          "Recursive references cannot be used on the right side of left 
outer/semi/anti joins, on the left side of right outer joins, in full outer 
joins, in aggregates, and in subquery expressions."
+          "Recursive references cannot be used on the right side of left 
outer/semi/anti joins, on the left side of right outer joins, in full outer 
joins and in aggregates"
         ]
       }
     },
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f34cf8ce5349..863398de9cc9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -947,30 +947,14 @@ trait CheckAnalysis extends LookupCatalog with 
QueryErrorsBase with PlanToString
     if (expr.plan.isStreaming) {
       plan.failAnalysis("INVALID_SUBQUERY_EXPRESSION.STREAMING_QUERY", 
Map.empty)
     }
-    assertNoRecursiveCTE(expr.plan)
     checkAnalysis0(expr.plan)
     ValidateSubqueryExpression(plan, expr)
   }
 
-  private def assertNoRecursiveCTE(plan: LogicalPlan): Unit = {
-    plan.foreach {
-      case r: CTERelationRef if r.recursive =>
-        throw new AnalysisException(
-          errorClass = "INVALID_RECURSIVE_REFERENCE.PLACE",
-          messageParameters = Map.empty)
-      case p => 
p.expressions.filter(_.containsPattern(PLAN_EXPRESSION)).foreach {
-        expr => expr.foreach {
-          case s: SubqueryExpression => assertNoRecursiveCTE(s.plan)
-          case _ =>
-        }
-      }
-    }
-  }
-
   /**
    * Validate that collected metrics names are unique. The same name cannot be 
used for metrics
-   * with different results. However multiple instances of metrics with with 
same result and name
-   * are allowed (e.g. self-joins).
+   * with different results. However, multiple instances of metrics with same 
result and name are
+   * allowed (e.g. self-joins).
    */
   private def checkCollectedMetrics(plan: LogicalPlan): Unit = {
     val metricsMap = mutable.Map.empty[String, CollectMetrics]
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 66c3bfb46530..8236fee4bcb9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1041,8 +1041,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
         p
       }
 
-    // TODO: Pruning `UnionLoop`s needs to take into account both the outer 
`Project` and the inner
-    //  `UnionLoopRef` nodes.
+    // Avoid pruning UnionLoop because of its recursive nature.
     case p @ Project(_, _: UnionLoop) => p
 
     // Prune unnecessary window expressions


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SQL][MINOR] Update comments and remove dead code in UnionLoop

Reply via email to