This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new af8228ce9ae [SPARK-46535][SQL] Fix NPE when describe extended a column without col stats af8228ce9ae is described below commit af8228ce9aee99eae9d08dbdefaaad32cf5438eb Author: zouxxyy <zouxinyu....@alibaba-inc.com> AuthorDate: Thu Dec 28 19:57:01 2023 +0300 [SPARK-46535][SQL] Fix NPE when describe extended a column without col stats ### What changes were proposed in this pull request? ### Why are the changes needed? Currently executing DESCRIBE TABLE EXTENDED a column without col stats with v2 table will throw a null pointer exception. ```text Cannot invoke "org.apache.spark.sql.connector.read.colstats.ColumnStatistics.min()" because the return value of "scala.Option.get()" is null java.lang.NullPointerException: Cannot invoke "org.apache.spark.sql.connector.read.colstats.ColumnStatistics.min()" because the return value of "scala.Option.get()" is null at org.apache.spark.sql.execution.datasources.v2.DescribeColumnExec.run(DescribeColumnExec.scala:63) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49) at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:118) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:150) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:241) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:116) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918) ``` This RP will fix it ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Add a new test `describe extended (formatted) a column without col stats` ### Was this patch authored or co-authored using generative AI tooling? Closes #44524 from Zouxxyy/dev/fix-stats. Lead-authored-by: zouxxyy <zouxinyu....@alibaba-inc.com> Co-authored-by: Kent Yao <y...@apache.org> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../datasources/v2/DescribeColumnExec.scala | 2 +- .../execution/command/v2/DescribeTableSuite.scala | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala index 61ccda3fc95..2683d8d547f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala @@ -53,7 +53,7 @@ case class DescribeColumnExec( read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match { case s: SupportsReportStatistics => val stats = s.estimateStatistics() - Some(stats.columnStats().get(FieldReference.column(column.name))) + Option(stats.columnStats().get(FieldReference.column(column.name))) case _ => None } case _ => None diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala index e2f2aee5611..a21baebe24d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala @@ -175,4 +175,25 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase Row("max_col_len", "NULL"))) } } + + test("SPARK-46535: describe extended (formatted) a column without col stats") { + withNamespaceAndTable("ns", "tbl") { tbl => + sql( + s""" + |CREATE TABLE $tbl + |(key INT COMMENT 'column_comment', col STRING) + |$defaultUsing""".stripMargin) + + val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key") + assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq( + ("info_name", StringType), + ("info_value", StringType))) + QueryTest.checkAnswer( + descriptionDf, + Seq( + Row("col_name", "key"), + Row("data_type", "int"), + Row("comment", "column_comment"))) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org