This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 24edc0ef5bee [SPARK-45752][SQL] Unreferenced CTE should all be checked
by CheckAnalysis0
24edc0ef5bee is described below
commit 24edc0ef5bee578de8eec3b032f993812e4303ea
Author: Rui Wang <[email protected]>
AuthorDate: Thu Nov 9 15:25:52 2023 +0800
[SPARK-45752][SQL] Unreferenced CTE should all be checked by CheckAnalysis0
### What changes were proposed in this pull request?
This PR fixes an issue that if a CTE is referenced by a non-referenced CTE,
then this CTE should also have ref count as 0 and goes through CheckAnalysis0.
This will guarantee analyzer throw proper error message for problematic CTE
which is not referenced.
### Why are the changes needed?
To improve error message for non-referenced CTE case.
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
UT
### Was this patch authored or co-authored using generative AI tooling?
NO
Closes #43614 from amaliujia/cte_ref.
Lead-authored-by: Rui Wang <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/CheckAnalysis.scala | 28 ++++++++++++++++++++--
.../org/apache/spark/sql/CTEInlineSuite.scala | 11 +++++++++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index cebaee2cdec9..29d60ae0f41e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -148,15 +148,39 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog with QueryErrorsB
errorClass, missingCol, orderedCandidates, a.origin)
}
+ private def checkUnreferencedCTERelations(
+ cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
+ visited: mutable.Map[Long, Boolean],
+ cteId: Long): Unit = {
+ if (visited(cteId)) {
+ return
+ }
+ val (cteDef, _, refMap) = cteMap(cteId)
+ refMap.foreach { case (id, _) =>
+ checkUnreferencedCTERelations(cteMap, visited, id)
+ }
+ checkAnalysis0(cteDef.child)
+ visited(cteId) = true
+ }
+
def checkAnalysis(plan: LogicalPlan): Unit = {
val inlineCTE = InlineCTE(alwaysInline = true)
val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int,
mutable.Map[Long, Int])]
inlineCTE.buildCTEMap(plan, cteMap)
- cteMap.values.foreach { case (relation, refCount, _) =>
+ cteMap.values.foreach { case (relation, _, _) =>
// If a CTE relation is never used, it will disappear after inline. Here
we explicitly check
// analysis for it, to make sure the entire query plan is valid.
try {
- if (refCount == 0) checkAnalysis0(relation.child)
+ // If a CTE relation ref count is 0, the other CTE relations that
reference it
+ // should also be checked by checkAnalysis0. This code will also
guarantee the leaf
+ // relations that do not reference any others are checked first.
+ val visited: mutable.Map[Long, Boolean] =
mutable.Map.empty.withDefaultValue(false)
+ cteMap.foreach { case (cteId, _) =>
+ val (_, refCount, _) = cteMap(cteId)
+ if (refCount == 0) {
+ checkUnreferencedCTERelations(cteMap, visited, cteId)
+ }
+ }
} catch {
case e: AnalysisException =>
throw new ExtendedAnalysisException(e, relation.child)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 5f6c44792658..055c04992c00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -678,6 +678,17 @@ abstract class CTEInlineSuiteBase
}.isDefined, "CTE columns should not be pruned.")
}
}
+
+ test("SPARK-45752: Unreferenced CTE should all be checked by
CheckAnalysis0") {
+ val e = intercept[AnalysisException](sql(
+ s"""
+ |with
+ |a as (select * from non_exist),
+ |b as (select * from a)
+ |select 2
+ |""".stripMargin))
+ checkErrorTableNotFound(e, "`non_exist`", ExpectedContext("non_exist", 26,
34))
+ }
}
class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with
DisableAdaptiveExecutionSuite
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]