This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 7d57577037f [SPARK-39112][SQL] UnsupportedOperationException if spark.sql.ui.explainMode is set to cost 7d57577037f is described below commit 7d57577037fe082b6b1ded093943669dd1f8dd05 Author: ulysses-you <ulyssesyo...@gmail.com> AuthorDate: Wed May 11 14:32:05 2022 +0800 [SPARK-39112][SQL] UnsupportedOperationException if spark.sql.ui.explainMode is set to cost ### What changes were proposed in this pull request? Add a new leaf like node `LeafNodeWithoutStats` and apply to the list: - ResolvedDBObjectName - ResolvedNamespace - ResolvedTable - ResolvedView - ResolvedNonPersistentFunc - ResolvedPersistentFunc ### Why are the changes needed? We enable v2 command at 3.3.0 branch by default `spark.sql.legacy.useV1Command`. However this is a behavior change between v1 and c2 command. - v1 command: We resolve logical plan to command at analyzer phase by `ResolveSessionCatalog` - v2 commnd: We resolve logical plan to v2 command at physical phase by `DataSourceV2Strategy` Foe cost explain mode, we will call `LogicalPlanStats.stats` using optimized plan so there is a gap between v1 and v2 command. Unfortunately, the logical plan of v2 command contains the `LeafNode` which does not override the `computeStats`. As a result, there is a error running such sql: ```sql set spark.sql.ui.explainMode=cost; show tables; ``` ``` java.lang.UnsupportedOperationException: at org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats$(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.analysis.ResolvedNamespace.computeStats(v2ResolutionPlans.scala:155) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:55) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:27) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:49) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:27) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:30) ``` ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test Closes #36488 from ulysses-you/SPARK-39112. Authored-by: ulysses-you <ulyssesyo...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 06fd340daefd67a3e96393539401c9bf4b3cbde9) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/v2ResolutionPlans.scala | 28 ++++++++++++++++------ .../scala/org/apache/spark/sql/ExplainSuite.scala | 15 ++++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 4cffead93b2..a87f9e0082d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, LeafExpression, Unevaluable} -import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_FUNC} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, Table, TableCatalog} @@ -140,11 +140,19 @@ case class UnresolvedDBObjectName(nameParts: Seq[String], isNamespace: Boolean) override def output: Seq[Attribute] = Nil } +/** + * A resolved leaf node whose statistics has no meaning. + */ +trait LeafNodeWithoutStats extends LeafNode { + // Here we just return a dummy statistics to avoid compute statsCache + override def stats: Statistics = Statistics.DUMMY +} + /** * A plan containing resolved namespace. */ case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String]) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } @@ -156,7 +164,7 @@ case class ResolvedTable( identifier: Identifier, table: Table, outputAttributes: Seq[Attribute]) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = { val qualifier = catalog.name +: identifier.namespace :+ identifier.name outputAttributes.map(_.withQualifier(qualifier)) @@ -191,7 +199,7 @@ case class ResolvedFieldPosition(position: ColumnPosition) extends FieldPosition */ // TODO: create a generic representation for temp view, v1 view and v2 view, after we add view // support to v2 catalog. For now we only need the identifier to fallback to v1 command. -case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNode { +case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } @@ -202,20 +210,26 @@ case class ResolvedPersistentFunc( catalog: FunctionCatalog, identifier: Identifier, func: UnboundFunction) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } /** * A plan containing resolved non-persistent (temp or built-in) function. */ -case class ResolvedNonPersistentFunc(name: String, func: UnboundFunction) extends LeafNode { +case class ResolvedNonPersistentFunc( + name: String, + func: UnboundFunction) + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } /** * A plan containing resolved database object name with catalog determined. */ -case class ResolvedDBObjectName(catalog: CatalogPlugin, nameParts: Seq[String]) extends LeafNode { +case class ResolvedDBObjectName( + catalog: CatalogPlugin, + nameParts: Seq[String]) + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 073b67e0472..d6372834466 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -528,6 +528,21 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite "== Analyzed Logical Plan ==\nCreateViewCommand") } } + + test("SPARK-39112: UnsupportedOperationException if explain cost command using v2 command") { + withTempDir { dir => + sql("EXPLAIN COST CREATE DATABASE tmp") + sql("EXPLAIN COST DESC DATABASE tmp") + sql(s"EXPLAIN COST ALTER DATABASE tmp SET LOCATION '${dir.toURI.toString}'") + sql("EXPLAIN COST USE tmp") + sql("EXPLAIN COST CREATE TABLE t(c1 int) USING PARQUET") + sql("EXPLAIN COST SHOW TABLES") + sql("EXPLAIN COST SHOW CREATE TABLE t") + sql("EXPLAIN COST SHOW TBLPROPERTIES t") + sql("EXPLAIN COST DROP TABLE t") + sql("EXPLAIN COST DROP DATABASE tmp") + } + } } class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuite { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org