This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a45a3a3d60cb [SPARK-45012][SQL] CheckAnalysis should throw inlined
plan in AnalysisException
a45a3a3d60cb is described below
commit a45a3a3d60cb97b107a177ad16bfe36372bc3e9b
Author: Rui Wang <[email protected]>
AuthorDate: Thu Aug 31 10:51:44 2023 +0800
[SPARK-45012][SQL] CheckAnalysis should throw inlined plan in
AnalysisException
### What changes were proposed in this pull request?
CheckAnalysis should throw inlined plan in AnalysisException
### Why are the changes needed?
Before this change, the plan attached to AnalysisException is analyzed plan
but not inlined. However, `CheckAnalysis` inlines and checks the plan, so if an
exception is from `CheckAnalysis`, it should attach the inlined version of plan
which is more useful to debug than the original analyzed plan, especially when
there is CTE in inside the plan.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #42729 from amaliujia/improve_analyzer.
Authored-by: Rui Wang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/catalyst/analysis/Analyzer.scala | 9 ++-------
.../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 +++++++++++++++--
.../analyzer-results/postgreSQL/create_view.sql.out | 2 +-
.../sql-tests/results/postgreSQL/create_view.sql.out | 2 +-
4 files changed, 19 insertions(+), 11 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index cf819c7346c6..9a6d9c8b735b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -209,13 +209,8 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
if (plan.analyzed) return plan
AnalysisHelper.markInAnalyzer {
val analyzed = executeAndTrack(plan, tracker)
- try {
- checkAnalysis(analyzed)
- analyzed
- } catch {
- case e: AnalysisException =>
- throw new ExtendedAnalysisException(e, analyzed)
- }
+ checkAnalysis(analyzed)
+ analyzed
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 13999f391d9c..038cd7d944af 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -20,6 +20,7 @@ import scala.collection.mutable
import org.apache.spark.SparkException
import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.ExtendedAnalysisException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
import
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
Median, PercentileCont, PercentileDisc}
@@ -154,10 +155,22 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog with QueryErrorsB
cteMap.values.foreach { case (relation, refCount, _) =>
// If a CTE relation is never used, it will disappear after inline. Here
we explicitly check
// analysis for it, to make sure the entire query plan is valid.
- if (refCount == 0) checkAnalysis0(relation.child)
+ try {
+ if (refCount == 0) checkAnalysis0(relation.child)
+ } catch {
+ case e: AnalysisException =>
+ throw new ExtendedAnalysisException(e, relation.child)
+ }
+
}
// Inline all CTEs in the plan to help check query plan structures in
subqueries.
- checkAnalysis0(inlineCTE(plan))
+ val inlinedPlan = inlineCTE(plan)
+ try {
+ checkAnalysis0(inlinedPlan)
+ } catch {
+ case e: AnalysisException =>
+ throw new ExtendedAnalysisException(e, inlinedPlan)
+ }
plan.setAnalyzed()
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
index b199cb55f2a4..35c20597e2b6 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
@@ -52,7 +52,7 @@
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
CREATE VIEW key_dependent_view AS
SELECT * FROM view_base_table GROUP BY key
-- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
diff --git
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index bcd14c72a831..2a83cc19b6cf 100644
---
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -52,7 +52,7 @@ CREATE VIEW key_dependent_view AS
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]