This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 06fd340daef [SPARK-39112][SQL] UnsupportedOperationException if 
spark.sql.ui.explainMode is set to cost
06fd340daef is described below

commit 06fd340daefd67a3e96393539401c9bf4b3cbde9
Author: ulysses-you <ulyssesyo...@gmail.com>
AuthorDate: Wed May 11 14:32:05 2022 +0800

    [SPARK-39112][SQL] UnsupportedOperationException if 
spark.sql.ui.explainMode is set to cost
    
    ### What changes were proposed in this pull request?
    
    Add a new leaf like node `LeafNodeWithoutStats` and apply to the list:
    - ResolvedDBObjectName
    - ResolvedNamespace
    - ResolvedTable
    - ResolvedView
    - ResolvedNonPersistentFunc
    - ResolvedPersistentFunc
    
    ### Why are the changes needed?
    
    We enable v2 command at 3.3.0 branch by default 
`spark.sql.legacy.useV1Command`. However this is a behavior change between v1 
and c2 command.
    
    - v1 command:
      We resolve logical plan to command at analyzer phase by 
`ResolveSessionCatalog`
    
    - v2 commnd:
      We resolve logical plan to v2 command at physical phase by 
`DataSourceV2Strategy`
    
    Foe cost explain mode, we will call `LogicalPlanStats.stats` using 
optimized plan so there is a gap between v1 and v2 command.
    Unfortunately, the logical plan of v2 command contains the `LeafNode` which 
does not override the `computeStats`. As a result, there is a error running 
such sql:
    ```sql
    set spark.sql.ui.explainMode=cost;
    show tables;
    ```
    
    ```
    java.lang.UnsupportedOperationException:
            at 
org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats(LogicalPlan.scala:171)
            at 
org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats$(LogicalPlan.scala:171)
            at 
org.apache.spark.sql.catalyst.analysis.ResolvedNamespace.computeStats(v2ResolutionPlans.scala:155)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:55)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:27)
            at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:49)
            at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:27)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37)
            at scala.Option.getOrElse(Option.scala:189)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33)
            at 
org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33)
            at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:30)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, bug fix
    
    ### How was this patch tested?
    
    add test
    
    Closes #36488 from ulysses-you/SPARK-39112.
    
    Authored-by: ulysses-you <ulyssesyo...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/analysis/v2ResolutionPlans.scala  | 28 ++++++++++++++++------
 .../scala/org/apache/spark/sql/ExplainSuite.scala  | 15 ++++++++++++
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 4cffead93b2..a87f9e0082d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, LeafExpression, 
Unevaluable}
-import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, 
UNRESOLVED_FUNC}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, 
Identifier, Table, TableCatalog}
@@ -140,11 +140,19 @@ case class UnresolvedDBObjectName(nameParts: Seq[String], 
isNamespace: Boolean)
   override def output: Seq[Attribute] = Nil
 }
 
+/**
+ * A resolved leaf node whose statistics has no meaning.
+ */
+trait LeafNodeWithoutStats extends LeafNode {
+  // Here we just return a dummy statistics to avoid compute statsCache
+  override def stats: Statistics = Statistics.DUMMY
+}
+
 /**
  * A plan containing resolved namespace.
  */
 case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String])
-  extends LeafNode {
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
 
@@ -156,7 +164,7 @@ case class ResolvedTable(
     identifier: Identifier,
     table: Table,
     outputAttributes: Seq[Attribute])
-  extends LeafNode {
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = {
     val qualifier = catalog.name +: identifier.namespace :+ identifier.name
     outputAttributes.map(_.withQualifier(qualifier))
@@ -191,7 +199,7 @@ case class ResolvedFieldPosition(position: ColumnPosition) 
extends FieldPosition
  */
 // TODO: create a generic representation for temp view, v1 view and v2 view, 
after we add view
 //       support to v2 catalog. For now we only need the identifier to 
fallback to v1 command.
-case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends 
LeafNode {
+case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends 
LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
 
@@ -202,20 +210,26 @@ case class ResolvedPersistentFunc(
     catalog: FunctionCatalog,
     identifier: Identifier,
     func: UnboundFunction)
-  extends LeafNode {
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
 
 /**
  * A plan containing resolved non-persistent (temp or built-in) function.
  */
-case class ResolvedNonPersistentFunc(name: String, func: UnboundFunction) 
extends LeafNode {
+case class ResolvedNonPersistentFunc(
+    name: String,
+    func: UnboundFunction)
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
 
 /**
  * A plan containing resolved database object name with catalog determined.
  */
-case class ResolvedDBObjectName(catalog: CatalogPlugin, nameParts: 
Seq[String]) extends LeafNode {
+case class ResolvedDBObjectName(
+    catalog: CatalogPlugin,
+    nameParts: Seq[String])
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index b122d2a7980..a9624cb1e6a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -528,6 +528,21 @@ class ExplainSuite extends ExplainSuiteHelper with 
DisableAdaptiveExecutionSuite
         "== Analyzed Logical Plan ==\nCreateViewCommand")
     }
   }
+
+  test("SPARK-39112: UnsupportedOperationException if explain cost command 
using v2 command") {
+    withTempDir { dir =>
+      sql("EXPLAIN COST CREATE DATABASE tmp")
+      sql("EXPLAIN COST DESC DATABASE tmp")
+      sql(s"EXPLAIN COST ALTER DATABASE tmp SET LOCATION 
'${dir.toURI.toString}'")
+      sql("EXPLAIN COST USE tmp")
+      sql("EXPLAIN COST CREATE TABLE t(c1 int) USING PARQUET")
+      sql("EXPLAIN COST SHOW TABLES")
+      sql("EXPLAIN COST SHOW CREATE TABLE t")
+      sql("EXPLAIN COST SHOW TBLPROPERTIES t")
+      sql("EXPLAIN COST DROP TABLE t")
+      sql("EXPLAIN COST DROP DATABASE tmp")
+    }
+  }
 }
 
 class ExplainSuiteAE extends ExplainSuiteHelper with 
EnableAdaptiveExecutionSuite {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to