cloud-fan commented on a change in pull request #28371: URL: https://github.com/apache/spark/pull/28371#discussion_r415953305
########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala ########## @@ -41,34 +43,45 @@ object CTESubstitution extends Rule[LogicalPlan] { } /** - * Check the plan to be traversed has naming conflicts in nested CTE or not, traverse through - * child, innerChildren and subquery expressions for the current plan. + * Spark 3.0 changes the CTE relations resolution, and inner relations take precedence. This is + * correct but we need to warn users about this behavior change under EXCEPTION mode, when we see + * CTE relations with conflicting names. + * + * Note that, before Spark 3.0 the parser didn't support CTE in the FROM clause. For example, + * `WITH ... SELECT * FROM (WITH ... SELECT ...)` was not supported. We should not fail for this + * case, as Spark versions before 3.0 can't run it anyway. The parameter `startOfQuery` is used + * to indicate where we can define CTE relations before Spark 3.0, and we should only check + * name conflicts when `startOfQuery` is true. */ private def assertNoNameConflictsInCTE( plan: LogicalPlan, - outerCTERelationNames: Set[String] = Set.empty, - namesInSubqueries: Set[String] = Set.empty): Unit = { + outerCTERelationNames: Seq[String] = Nil, + startOfQuery: Boolean = true): Unit = { + val resolver = SQLConf.get.resolver plan match { - case w @ With(child, relations) => - val newNames = relations.map { - case (cteName, _) => - if (outerCTERelationNames.contains(cteName)) { - throw new AnalysisException(s"Name $cteName is ambiguous in nested CTE. " + + case With(child, relations) => + val newNames = mutable.ArrayBuffer.empty[String] + newNames ++= outerCTERelationNames + relations.foreach { + case (name, relation) => + if (startOfQuery && outerCTERelationNames.exists(resolver(_, name))) { + throw new AnalysisException(s"Name $name is ambiguous in nested CTE. " + s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to CORRECTED so that name " + "defined in inner CTE takes precedence. If set it to LEGACY, outer CTE " + "definitions will take precedence. See more details in SPARK-28228.") - } else { - cteName } - }.toSet ++ namesInSubqueries - assertNoNameConflictsInCTE(child, outerCTERelationNames, newNames) - w.innerChildren.foreach(assertNoNameConflictsInCTE(_, newNames, newNames)) + assertNoNameConflictsInCTE(relation, newNames) Review comment: good idea! updated. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org