(spark) branch master updated: [SPARK-49866][SQL] Improve the error message for describe table with partition columns

maxgekk Sat, 05 Oct 2024 02:23:42 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 37f2966b6423 [SPARK-49866][SQL] Improve the error message for describe 
table with partition columns
37f2966b6423 is described below

commit 37f2966b6423b3c2cb98db3fede67adb23293dee
Author: Mihailo Aleksic <[email protected]>
AuthorDate: Sat Oct 5 11:23:22 2024 +0200

    [SPARK-49866][SQL] Improve the error message for describe table with 
partition columns
    
    ### What changes were proposed in this pull request?
    Provide more user facing error when partition column name can't be found in 
the table schema.
    
    ### Why are the changes needed?
    There's an issue where partition column sometimes doesn't match any from 
the table schema. When that happens we throw an assertion error which is not 
user friendly. Because of that we introduced new `QueryExecutionError` in order 
to make it more user facing.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, users will get more user friendly error message.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #48338 from 
mihailoale-db/mihailoale-db/fixdescribepartitioningmessage.
    
    Authored-by: Mihailo Aleksic <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 common/utils/src/main/resources/error/error-conditions.json  |  6 ++++++
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala   | 12 ++++++++++++
 .../sql/execution/datasources/v2/DescribeTableExec.scala     | 10 +++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index eca3587c7407..f4cb34495612 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3802,6 +3802,12 @@
     ],
     "sqlState" : "428FT"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Partition column <column> not found in schema <schema>. Please provide 
the existing column for partitioning."
+    ],
+    "sqlState" : "42000"
+  },
   "PATH_ALREADY_EXISTS" : {
     "message" : [
       "Path <outputPath> already exists. Set mode as \"overwrite\" to 
overwrite the existing path."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 5e3aa3ef5f6b..bc6c7681ea1a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
       )
     )
   }
+
+  def partitionColumnNotFoundInTheTableSchemaError(
+      column: Seq[String],
+      schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+      messageParameters = Map(
+        "column" -> toSQLId(column),
+        "schema" -> toSQLType(schema)
+      )
+    )
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 7f7f280d8cdc..7cfd601ef774 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, 
ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, 
SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, 
IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -156,9 +157,12 @@ case class DescribeTableExec(
           .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
           .map { fieldNames =>
             val nestedField = 
table.schema.findNestedField(fieldNames.toImmutableArraySeq)
-            assert(nestedField.isDefined,
-              s"Not found the partition column 
${fieldNames.map(quoteIfNeeded).mkString(".")} " +
-              s"in the table schema ${table.schema().catalogString}.")
+            if (nestedField.isEmpty) {
+              throw 
QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                table.schema()
+              )
+            }
             nestedField.get
           }.map { case (path, field) =>
             toCatalystRow(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-49866][SQL] Improve the error message for describe table with partition columns

Reply via email to