peter-toth commented on a change in pull request #33671:
URL: https://github.com/apache/spark/pull/33671#discussion_r684794342
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
##########
@@ -20,25 +20,60 @@ package org.apache.spark.sql.catalyst.analysis
import scala.collection.mutable
import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan,
SubqueryAlias, With}
+import org.apache.spark.sql.catalyst.plans.logical.{Command, CTERelationDef,
CTERelationRef, InsertIntoDir, LogicalPlan, ParsedStatement, SubqueryAlias,
UnresolvedWith, WithCTE}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.TreePattern._
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.internal.SQLConf.{LEGACY_CTE_PRECEDENCE_POLICY,
LegacyBehaviorPolicy}
/**
- * Analyze WITH nodes and substitute child plan with CTE definitions.
+ * Analyze WITH nodes and substitute child plan with CTE references or CTE
definitions depending
+ * on the conditions below:
+ * 1. If in legacy mode, or if the query is a SQL command or DML statement,
replace with CTE
+ * definitions, i.e., inline CTEs.
+ * 2. Otherwise, replace with CTE references `CTERelationRef`s. The decision
to inline or not
+ * inline will be made later by the rule `InlineCTE` after query analysis.
+ *
+ * All the CTE definitions that are not inlined after this substitution will
be grouped together
+ * under one `WithCTE` node for each of the main query and the subqueries. Any
of the main query
+ * or the subqueries that do not contain CTEs or have had all CTEs inlined
will obviously not have
+ * any `WithCTE` nodes. If any though, the `WithCTE` node will be in the same
place as where the
+ * outermost `With` node once was.
+ *
+ * The CTE definitions in a `WithCTE` node are kept in the order of how they
have been resolved.
+ * That means the CTE definitions are guaranteed to be in topological order
base on their
+ * dependency for any valid CTE query (i.e., given CTE definitions A and B
with B referencing A,
+ * A is guaranteed to appear before B). Otherwise, it must be an invalid user
query, and an
+ * analysis exception will be thrown later by relation resolving rules.
*/
object CTESubstitution extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = {
- LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY))
match {
- case LegacyBehaviorPolicy.EXCEPTION =>
- assertNoNameConflictsInCTE(plan)
- traverseAndSubstituteCTE(plan)
- case LegacyBehaviorPolicy.LEGACY =>
- legacyTraverseAndSubstituteCTE(plan)
- case LegacyBehaviorPolicy.CORRECTED =>
- traverseAndSubstituteCTE(plan)
+ val isCommand = plan.find {
+ case _: Command | _: ParsedStatement | _: InsertIntoDir => true
+ case _ => false
+ }.isDefined
+ val cteDefs = mutable.ArrayBuffer.empty[CTERelationDef]
+ val (substituted, lastSubstituted) =
+
LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match
{
+ case LegacyBehaviorPolicy.EXCEPTION =>
+ assertNoNameConflictsInCTE(plan)
+ traverseAndSubstituteCTE(plan, isCommand, cteDefs)
+ case LegacyBehaviorPolicy.LEGACY =>
+ (legacyTraverseAndSubstituteCTE(plan, cteDefs), None)
+ case LegacyBehaviorPolicy.CORRECTED =>
+ traverseAndSubstituteCTE(plan, isCommand, cteDefs)
+ }
+ if (cteDefs.isEmpty) {
+ substituted
+ } else if (substituted eq lastSubstituted.get) {
+ WithCTE(substituted, cteDefs)
+ } else {
+ var done = false
+ substituted.resolveOperators {
Review comment:
Maybe we could use `substituted.resolveOperatorsWithPruning(_ => !done)
{` to break out?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]