This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3e8191f7267 [SPARK-41009][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1070` to `LOCATION_ALREADY_EXISTS` 3e8191f7267 is described below commit 3e8191f726721bf74c8dbcb3ea73a216f6bf0517 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Nov 9 12:33:13 2022 +0300 [SPARK-41009][SQL] Rename the error class `_LEGACY_ERROR_TEMP_1070` to `LOCATION_ALREADY_EXISTS` ### What changes were proposed in this pull request? In the PR, I propose to assign the proper name `LOCATION_ALREADY_EXISTS ` to the legacy error class `_LEGACY_ERROR_TEMP_1070 `, and modify test suite to use `checkError()` which checks the error class name, context and etc. ### Why are the changes needed? Proper name improves user experience w/ Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes an user-facing error message. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "core/testOnly *SparkThrowableSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenameSuite" $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *HiveCatalogedDDLSuite" ``` Closes #38490 from MaxGekk/location-already-exists. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 10 +++--- .../sql/catalyst/catalog/SessionCatalog.scala | 8 ++--- .../spark/sql/errors/QueryCompilationErrors.scala | 10 ------ .../spark/sql/errors/QueryExecutionErrors.scala | 8 +++++ .../spark/sql/execution/command/DDLSuite.scala | 42 ++++++++++++---------- .../command/v1/AlterTableRenameSuite.scala | 17 +++++---- 6 files changed, 51 insertions(+), 44 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 71703e7efd9..9c914b86bb1 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -669,6 +669,11 @@ } } }, + "LOCATION_ALREADY_EXISTS" : { + "message" : [ + "Cannot name the managed table as <identifier>, as its associated location <location> already exists. Please pick a different table name, or remove the existing location first." + ] + }, "MALFORMED_PROTOBUF_MESSAGE" : { "message" : [ "Malformed Protobuf messages are detected in message deserialization. Parse Mode: <failFastMode>. To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'." @@ -1949,11 +1954,6 @@ "CREATE EXTERNAL TABLE must be accompanied by LOCATION." ] }, - "_LEGACY_ERROR_TEMP_1070" : { - "message" : [ - "Can not <methodName> the managed table('<tableIdentifier>'). The associated location('<tableLocation>') already exists." - ] - }, "_LEGACY_ERROR_TEMP_1071" : { "message" : [ "Some existing schema fields (<nonExistentColumnNames>) are not present in the new schema. We don't support dropping columns yet." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index bf712f9681e..06214613299 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Subque import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager -import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE import org.apache.spark.sql.types.StructType @@ -411,8 +411,7 @@ class SessionCatalog( val fs = tableLocation.getFileSystem(hadoopConf) if (fs.exists(tableLocation) && fs.listStatus(tableLocation).nonEmpty) { - throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError( - "create", table.identifier, tableLocation) + throw QueryExecutionErrors.locationAlreadyExists(table.identifier, tableLocation) } } } @@ -1912,8 +1911,7 @@ class SessionCatalog( val newTableLocation = new Path(new Path(databaseLocation), format(newName.table)) val fs = newTableLocation.getFileSystem(hadoopConf) if (fs.exists(newTableLocation)) { - throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError( - "rename", oldName, newTableLocation) + throw QueryExecutionErrors.locationAlreadyExists(newName, newTableLocation) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 67ceafbf03d..76fe951c0ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -851,16 +851,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Map.empty) } - def cannotOperateManagedTableWithExistingLocationError( - methodName: String, tableIdentifier: TableIdentifier, tableLocation: Path): Throwable = { - new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1070", - messageParameters = Map( - "methodName" -> methodName, - "tableIdentifier" -> tableIdentifier.toString, - "tableLocation" -> tableLocation.toString)) - } - def dropNonExistentColumnsNotSupportedError( nonExistentColumnNames: Seq[String]): Throwable = { new AnalysisException( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index aad9a9ba51e..73664e64c22 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2812,4 +2812,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "failFastMode" -> FailFastMode.name), cause = e) } + + def locationAlreadyExists(tableId: TableIdentifier, location: Path): Throwable = { + new SparkRuntimeException( + errorClass = "LOCATION_ALREADY_EXISTS", + messageParameters = Map( + "location" -> toSQLValue(location.toString, StringType), + "identifier" -> toSQLId(tableId.nameParts))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 883672cc112..5aa36471770 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -24,7 +24,7 @@ import java.util.Locale import org.apache.hadoop.fs.{Path, RawLocalFileSystem} import org.apache.hadoop.fs.permission.{AclEntry, AclStatus} -import org.apache.spark.{SparkException, SparkFiles} +import org.apache.spark.{SparkException, SparkFiles, SparkRuntimeException} import org.apache.spark.internal.config import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier} @@ -366,26 +366,32 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { withEmptyDirInTablePath("tab1") { tableLoc => val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage") hiddenGarbageFile.createNewFile() - val exMsgWithDefaultDB = - s"Can not create the managed table('`$SESSION_CATALOG_NAME`.`default`.`tab1`'). " + - "The associated location" - var ex = intercept[AnalysisException] { - sql(s"CREATE TABLE tab1 USING ${dataSource} AS SELECT 1, 'a'") - }.getMessage - assert(ex.contains(exMsgWithDefaultDB)) - - ex = intercept[AnalysisException] { - sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}") - }.getMessage - assert(ex.contains(exMsgWithDefaultDB)) + val expectedLoc = s"'${hiddenGarbageFile.getParentFile.toURI.toString.stripSuffix("/")}'" + Seq( + s"CREATE TABLE tab1 USING $dataSource AS SELECT 1, 'a'", + s"CREATE TABLE tab1 (col1 int, col2 string) USING $dataSource" + ).foreach { createStmt => + checkError( + exception = intercept[SparkRuntimeException] { + sql(createStmt) + }, + errorClass = "LOCATION_ALREADY_EXISTS", + parameters = Map( + "location" -> expectedLoc, + "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")) + } // Always check location of managed table, with or without (IF NOT EXISTS) withTable("tab2") { - sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING ${dataSource}") - ex = intercept[AnalysisException] { - sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2") - }.getMessage - assert(ex.contains(exMsgWithDefaultDB)) + sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING $dataSource") + checkError( + exception = intercept[SparkRuntimeException] { + sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2") + }, + errorClass = "LOCATION_ALREADY_EXISTS", + parameters = Map( + "location" -> expectedLoc, + "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala index abc99db441d..3efd6d8a957 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.SparkRuntimeException import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.errors.QueryErrorsBase import org.apache.spark.sql.execution.command /** @@ -28,7 +30,7 @@ import org.apache.spark.sql.execution.command * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.AlterTableRenameSuite` * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.AlterTableRenameSuite` */ -trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase { +trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase with QueryErrorsBase { test("destination database is different") { withNamespaceAndTable("dst_ns", "dst_tbl") { dst => withNamespace("src_ns") { @@ -66,11 +68,14 @@ trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase { withTableDir(dst) { (fs, dst_dir) => sql(s"DROP TABLE $dst") fs.mkdirs(dst_dir) - val errMsg = intercept[AnalysisException] { - sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl") - }.getMessage - assert(errMsg.matches("Can not rename the managed table(.+). " + - "The associated location(.+) already exists.")) + checkError( + exception = intercept[SparkRuntimeException] { + sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl") + }, + errorClass = "LOCATION_ALREADY_EXISTS", + parameters = Map( + "location" -> s"'$dst_dir'", + "identifier" -> toSQLId(dst))) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org