This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 92655db9fc6 [SPARK-41443][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1061 92655db9fc6 is described below commit 92655db9fc69410022052b6e662488285a322490 Author: panbingkun <pbk1...@gmail.com> AuthorDate: Sat Dec 10 19:27:26 2022 +0300 [SPARK-41443][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_1061 ### What changes were proposed in this pull request? In the PR, I propose to assign the name COLUMN_NOT_FOUND to the error class _LEGACY_ERROR_TEMP_1061. ### Why are the changes needed? Proper names of error classes should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add new UT. Pass GA. Closes #38972 from panbingkun/LEGACY_ERROR_TEMP_1061. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 10 ++++---- .../spark/sql/errors/QueryCompilationErrors.scala | 14 ++++++----- .../catalyst/analysis/ResolveSessionCatalog.scala | 2 +- .../execution/command/AnalyzeColumnCommand.scala | 2 +- .../spark/sql/execution/command/tables.scala | 2 +- .../spark/sql/StatisticsCollectionSuite.scala | 29 ++++++++++++++++------ .../execution/command/v1/DescribeTableSuite.scala | 28 +++++++++++++++------ .../apache/spark/sql/hive/StatisticsSuite.scala | 21 ++++++++++++++-- 8 files changed, 76 insertions(+), 32 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a8738994e17..3f091f090fc 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -109,6 +109,11 @@ "The column <columnName> already exists. Consider to choose another name or rename the existing column." ] }, + "COLUMN_NOT_FOUND" : { + "message" : [ + "The column <colName> cannot be found. Verify the spelling and correctness of the column name according to the SQL config <caseSensitiveConfig>." + ] + }, "CONCURRENT_QUERY" : { "message" : [ "Another instance of this query was just started by a concurrent session." @@ -2092,11 +2097,6 @@ "<command> does not support nested column: <column>." ] }, - "_LEGACY_ERROR_TEMP_1061" : { - "message" : [ - "Column <colName> does not exist." - ] - }, "_LEGACY_ERROR_TEMP_1065" : { "message" : [ "`<name>` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index ed08e33829e..b507045f8c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -795,12 +795,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "column" -> quoted)) } - def columnDoesNotExistError(colName: String): Throwable = { - new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1061", - messageParameters = Map("colName" -> colName)) - } - def renameTempViewToExistingViewError(newName: String): Throwable = { new TableAlreadyExistsException(newName) } @@ -2281,6 +2275,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map("columnName" -> toSQLId(columnName))) } + def columnNotFoundError(colName: String): Throwable = { + new AnalysisException( + errorClass = "COLUMN_NOT_FOUND", + messageParameters = Map( + "colName" -> toSQLId(colName), + "caseSensitiveConfig" -> toSQLConf(SQLConf.CASE_SENSITIVE.key))) + } + def noSuchTableError(db: String, table: String): Throwable = { new NoSuchTableException(db = db, table = table) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 4afcf5b7514..7b2d5015840 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -155,7 +155,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case DescribeColumn(ResolvedV1TableIdentifier(ident), column, isExtended, output) => column match { case u: UnresolvedAttribute => - throw QueryCompilationErrors.columnDoesNotExistError(u.name) + throw QueryCompilationErrors.columnNotFoundError(u.name) case a: Attribute => DescribeColumnCommand(ident, a.qualifier :+ a.name, isExtended, output) case Alias(child, _) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 88bba7f5ec9..d821b127e06 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -86,7 +86,7 @@ case class AnalyzeColumnCommand( } else { columnNames.get.map { col => val exprOption = relation.output.find(attr => conf.resolver(attr.name, col)) - exprOption.getOrElse(throw QueryCompilationErrors.columnDoesNotExistError(col)) + exprOption.getOrElse(throw QueryCompilationErrors.columnNotFoundError(col)) } } // Make sure the column types are supported for stats gathering. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 6b089a13d40..5e733ad9e5b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -764,7 +764,7 @@ case class DescribeColumnCommand( val colName = UnresolvedAttribute(colNameParts).name val field = { relation.resolve(colNameParts, resolver).getOrElse { - throw QueryCompilationErrors.columnDoesNotExistError(colName) + throw QueryCompilationErrors.columnNotFoundError(colName) } } if (!field.isInstanceOf[Attribute]) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index d4077274d5c..1d28c14f118 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -124,16 +124,29 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared Seq(ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3)))).toDF().write.saveAsTable(tableName) // Test unsupported data types - val err1 = intercept[AnalysisException] { - sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data") - } - assert(err1.message.contains("does not support statistics collection")) + checkError( + exception = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data") + }, + errorClass = "_LEGACY_ERROR_TEMP_1235", + parameters = Map( + "name" -> "data", + "tableIdent" -> "`spark_catalog`.`default`.`column_stats_test1`", + "dataType" -> "ArrayType(IntegerType,true)" + ) + ) // Test invalid columns - val err2 = intercept[AnalysisException] { - sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column") - } - assert(err2.message.contains("does not exist")) + checkError( + exception = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column") + }, + errorClass = "COLUMN_NOT_FOUND", + parameters = Map( + "colName" -> "`some_random_column`", + "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\"" + ) + ) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala index 9bee8d38c0b..84da38f5097 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala @@ -59,10 +59,16 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase |CREATE TABLE $tbl |(key int COMMENT 'column_comment', col struct<x:int, y:string>) |$defaultUsing""".stripMargin) - val errMsg = intercept[AnalysisException] { - sql(s"DESC $tbl key1").collect() - }.getMessage - assert(errMsg === "Column key1 does not exist.") + checkError( + exception = intercept[AnalysisException] { + sql(s"DESC $tbl key1").collect() + }, + errorClass = "COLUMN_NOT_FOUND", + parameters = Map( + "colName" -> "`key1`", + "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\"" + ) + ) } } @@ -79,10 +85,16 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { withNamespaceAndTable("ns", "tbl") { tbl => sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing") - val errMsg = intercept[AnalysisException] { - sql(s"DESC $tbl KEY").collect() - }.getMessage - assert(errMsg === "Column KEY does not exist.") + checkError( + exception = intercept[AnalysisException] { + sql(s"DESC $tbl KEY").collect() + }, + errorClass = "COLUMN_NOT_FOUND", + parameters = Map( + "colName" -> "`KEY`", + "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\"" + ) + ) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 4b69a01834d..a03120ca442 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -27,7 +27,7 @@ import scala.util.matching.Regex import org.apache.hadoop.hive.common.StatsSetupConst import org.apache.spark.metrics.source.HiveCatalogMetrics -import org.apache.spark.sql._ +import org.apache.spark.sql.{AnalysisException, _} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, HiveTableRelation} @@ -43,7 +43,6 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.Utils - class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton { test("size estimation for relations is based on row size * number of rows") { @@ -582,6 +581,24 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } } + test("analyze not found column") { + val tableName = "analyzeTable" + withTable(tableName) { + sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)") + + checkError( + exception = intercept[AnalysisException] { + sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS fakeColumn") + }, + errorClass = "COLUMN_NOT_FOUND", + parameters = Map( + "colName" -> "`fakeColumn`", + "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\"" + ) + ) + } + } + test("analyze non-existent partition") { def assertAnalysisException(analyzeCommand: String, errorMessage: String): Unit = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org