[spark] branch master updated: [SPARK-45012][SQL] CheckAnalysis should throw inlined plan in AnalysisException

wenchen Wed, 30 Aug 2023 19:52:31 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new a45a3a3d60cb [SPARK-45012][SQL] CheckAnalysis should throw inlined 
plan in AnalysisException
a45a3a3d60cb is described below

commit a45a3a3d60cb97b107a177ad16bfe36372bc3e9b
Author: Rui Wang <[email protected]>
AuthorDate: Thu Aug 31 10:51:44 2023 +0800

    [SPARK-45012][SQL] CheckAnalysis should throw inlined plan in 
AnalysisException
    
    ### What changes were proposed in this pull request?
    
    CheckAnalysis should throw inlined plan in AnalysisException
    
    ### Why are the changes needed?
    
    Before this change, the plan attached to AnalysisException is analyzed plan 
but not inlined. However, `CheckAnalysis` inlines and checks the plan, so if an 
exception is from `CheckAnalysis`, it should attach the inlined version of plan 
which is more useful to debug than the original analyzed plan, especially when 
there is CTE in inside the plan.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Existing UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #42729 from amaliujia/improve_analyzer.
    
    Authored-by: Rui Wang <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala   |  9 ++-------
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala     | 17 +++++++++++++++--
 .../analyzer-results/postgreSQL/create_view.sql.out     |  2 +-
 .../sql-tests/results/postgreSQL/create_view.sql.out    |  2 +-
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index cf819c7346c6..9a6d9c8b735b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -209,13 +209,8 @@ class Analyzer(override val catalogManager: 
CatalogManager) extends RuleExecutor
     if (plan.analyzed) return plan
     AnalysisHelper.markInAnalyzer {
       val analyzed = executeAndTrack(plan, tracker)
-      try {
-        checkAnalysis(analyzed)
-        analyzed
-      } catch {
-        case e: AnalysisException =>
-          throw new ExtendedAnalysisException(e, analyzed)
-      }
+      checkAnalysis(analyzed)
+      analyzed
     }
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 13999f391d9c..038cd7d944af 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -20,6 +20,7 @@ import scala.collection.mutable
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Median, PercentileCont, PercentileDisc}
@@ -154,10 +155,22 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
     cteMap.values.foreach { case (relation, refCount, _) =>
       // If a CTE relation is never used, it will disappear after inline. Here 
we explicitly check
       // analysis for it, to make sure the entire query plan is valid.
-      if (refCount == 0) checkAnalysis0(relation.child)
+      try {
+        if (refCount == 0) checkAnalysis0(relation.child)
+      } catch {
+        case e: AnalysisException =>
+          throw new ExtendedAnalysisException(e, relation.child)
+      }
+
     }
     // Inline all CTEs in the plan to help check query plan structures in 
subqueries.
-    checkAnalysis0(inlineCTE(plan))
+    val inlinedPlan = inlineCTE(plan)
+    try {
+      checkAnalysis0(inlinedPlan)
+    } catch {
+      case e: AnalysisException =>
+        throw new ExtendedAnalysisException(e, inlinedPlan)
+    }
     plan.setAnalyzed()
   }
 
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
index b199cb55f2a4..35c20597e2b6 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
@@ -52,7 +52,7 @@ 
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 CREATE VIEW key_dependent_view AS
    SELECT * FROM view_base_table GROUP BY key
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "MISSING_AGGREGATION",
   "sqlState" : "42803",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index bcd14c72a831..2a83cc19b6cf 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -52,7 +52,7 @@ CREATE VIEW key_dependent_view AS
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "MISSING_AGGREGATION",
   "sqlState" : "42803",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-45012][SQL] CheckAnalysis should throw inlined plan in AnalysisException

Reply via email to