This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 37f2966b6423 [SPARK-49866][SQL] Improve the error message for describe
table with partition columns
37f2966b6423 is described below
commit 37f2966b6423b3c2cb98db3fede67adb23293dee
Author: Mihailo Aleksic <[email protected]>
AuthorDate: Sat Oct 5 11:23:22 2024 +0200
[SPARK-49866][SQL] Improve the error message for describe table with
partition columns
### What changes were proposed in this pull request?
Provide more user facing error when partition column name can't be found in
the table schema.
### Why are the changes needed?
There's an issue where partition column sometimes doesn't match any from
the table schema. When that happens we throw an assertion error which is not
user friendly. Because of that we introduced new `QueryExecutionError` in order
to make it more user facing.
### Does this PR introduce _any_ user-facing change?
Yes, users will get more user friendly error message.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #48338 from
mihailoale-db/mihailoale-db/fixdescribepartitioningmessage.
Authored-by: Mihailo Aleksic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
common/utils/src/main/resources/error/error-conditions.json | 6 ++++++
.../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 12 ++++++++++++
.../sql/execution/datasources/v2/DescribeTableExec.scala | 10 +++++++---
3 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index eca3587c7407..f4cb34495612 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3802,6 +3802,12 @@
],
"sqlState" : "428FT"
},
+ "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+ "message" : [
+ "Partition column <column> not found in schema <schema>. Please provide
the existing column for partitioning."
+ ],
+ "sqlState" : "42000"
+ },
"PATH_ALREADY_EXISTS" : {
"message" : [
"Path <outputPath> already exists. Set mode as \"overwrite\" to
overwrite the existing path."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 5e3aa3ef5f6b..bc6c7681ea1a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase with ExecutionE
)
)
}
+
+ def partitionColumnNotFoundInTheTableSchemaError(
+ column: Seq[String],
+ schema: StructType): SparkRuntimeException = {
+ new SparkRuntimeException(
+ errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+ messageParameters = Map(
+ "column" -> toSQLId(column),
+ "schema" -> toSQLType(schema)
+ )
+ )
+ }
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 7f7f280d8cdc..7cfd601ef774 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded,
ResolveDefaultColumns}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util,
SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
import org.apache.spark.sql.connector.expressions.{ClusterByTransform,
IdentityTransform}
import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.util.ArrayImplicits._
@@ -156,9 +157,12 @@ case class DescribeTableExec(
.map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
.map { fieldNames =>
val nestedField =
table.schema.findNestedField(fieldNames.toImmutableArraySeq)
- assert(nestedField.isDefined,
- s"Not found the partition column
${fieldNames.map(quoteIfNeeded).mkString(".")} " +
- s"in the table schema ${table.schema().catalogString}.")
+ if (nestedField.isEmpty) {
+ throw
QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+ fieldNames.toSeq,
+ table.schema()
+ )
+ }
nestedField.get
}.map { case (path, field) =>
toCatalystRow(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]