This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a9129defc0e [SPARK-44208][CORE][SQL] Assign clear error class names for some logic that directly uses exceptions a9129defc0e is described below commit a9129defc0ebbe68f20ec888352c30a90925d7ea Author: panbingkun <pbk1...@gmail.com> AuthorDate: Thu Jun 29 17:31:03 2023 +0300 [SPARK-44208][CORE][SQL] Assign clear error class names for some logic that directly uses exceptions ### What changes were proposed in this pull request? The pr aims to assign clear error class names for some logic that directly uses exceptions, include: - ALL_PARTITION_COLUMNS_NOT_ALLOWED - INVALID_HIVE_COLUMN_NAME - SPECIFY_BUCKETING_IS_NOT_ALLOWED - SPECIFY_PARTITION_IS_NOT_ALLOWED - UNSUPPORTED_ADD_FILE.DIRECTORY - UNSUPPORTED_ADD_FILE.LOCAL_DIRECTORY ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Update UT. - Pass GA. Closes #41740 from panbingkun/assign_new_name. Lead-authored-by: panbingkun <pbk1...@gmail.com> Co-authored-by: panbingkun <84731...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/resources/error/error-classes.json | 42 +++++++++++++++++++--- .../main/scala/org/apache/spark/SparkContext.scala | 7 ++-- .../org/apache/spark/errors/SparkCoreErrors.scala | 14 ++++++++ .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../spark/sql/execution/datasources/rules.scala | 16 +++++---- .../spark/sql/execution/command/DDLSuite.scala | 34 +++++++++--------- .../spark/sql/hive/HiveExternalCatalog.scala | 12 ++++--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 12 +++---- 8 files changed, 97 insertions(+), 42 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 192a0747dfd..6db8c5e3bf1 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -4,6 +4,11 @@ "Non-deterministic expression <sqlExpr> should not appear in the arguments of an aggregate function." ] }, + "ALL_PARTITION_COLUMNS_NOT_ALLOWED" : { + "message" : [ + "Cannot use all columns for partition columns." + ] + }, "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE" : { "message" : [ "ALTER TABLE <type> column <columnName> specifies descriptor \"<optionName>\" more than once, which is invalid." @@ -1180,6 +1185,11 @@ ], "sqlState" : "22023" }, + "INVALID_HIVE_COLUMN_NAME" : { + "message" : [ + "Cannot create the table <tableName> having the nested column <columnName> whose name contains invalid characters <invalidChars> in Hive metastore." + ] + }, "INVALID_IDENTIFIER" : { "message" : [ "The identifier <ident> is invalid. Please, consider quoting it with back-quotes as `<ident>`." @@ -2081,6 +2091,16 @@ "sortBy must be used together with bucketBy." ] }, + "SPECIFY_BUCKETING_IS_NOT_ALLOWED" : { + "message" : [ + "Cannot specify bucketing information if the table schema is not specified when creating and will be inferred at runtime." + ] + }, + "SPECIFY_PARTITION_IS_NOT_ALLOWED" : { + "message" : [ + "It is not allowed to specify partition columns when the table schema is not defined. When the table schema is not provided, schema and partition columns will be inferred." + ] + }, "SQL_CONF_NOT_FOUND" : { "message" : [ "The SQL config <sqlConf> cannot be found. Please verify that the config exists." @@ -2303,6 +2323,23 @@ "Attempted to unset non-existent properties [<properties>] in table <table>." ] }, + "UNSUPPORTED_ADD_FILE" : { + "message" : [ + "Don't support add file." + ], + "subClass" : { + "DIRECTORY" : { + "message" : [ + "The file <path> is a directory, consider to set \"spark.sql.legacy.addSingleFileInAddFile\" to \"false\"." + ] + }, + "LOCAL_DIRECTORY" : { + "message" : [ + "The local directory <path> is not supported in a non-local master mode." + ] + } + } + }, "UNSUPPORTED_ARROWTYPE" : { "message" : [ "Unsupported arrow type <typeName>." @@ -3588,11 +3625,6 @@ "Cannot use <field> for partition column." ] }, - "_LEGACY_ERROR_TEMP_1154" : { - "message" : [ - "Cannot use all columns for partition columns." - ] - }, "_LEGACY_ERROR_TEMP_1155" : { "message" : [ "Partition column `<col>` not found in schema <schemaCatalog>." diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 24d788ff5bc..78c7ecb2782 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -44,6 +44,7 @@ import org.apache.logging.log4j.Level import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil} +import org.apache.spark.errors.SparkCoreErrors import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource} import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat} import org.apache.spark.internal.Logging @@ -1737,12 +1738,10 @@ class SparkContext(config: SparkConf) extends Logging { val fs = hadoopPath.getFileSystem(hadoopConfiguration) val isDir = fs.getFileStatus(hadoopPath).isDirectory if (!isLocal && scheme == "file" && isDir) { - throw new SparkException(s"addFile does not support local directories when not running " + - "local mode.") + throw SparkCoreErrors.addLocalDirectoryError(hadoopPath) } if (!recursive && isDir) { - throw new SparkException(s"Added file $hadoopPath is a directory and recursive is not " + - "turned on.") + throw SparkCoreErrors.addDirectoryError(hadoopPath) } } else { // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala index f8e7f2db259..109086c4db6 100644 --- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala +++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala @@ -467,6 +467,20 @@ private[spark] object SparkCoreErrors { "receivedBytes" -> receivedBytes.toString).asJava) } + def addLocalDirectoryError(path: Path): Throwable = { + new SparkException( + errorClass = "UNSUPPORTED_ADD_FILE.LOCAL_DIRECTORY", + messageParameters = Map("path" -> path.toString), + cause = null) + } + + def addDirectoryError(path: Path): Throwable = { + new SparkException( + errorClass = "UNSUPPORTED_ADD_FILE.DIRECTORY", + messageParameters = Map("path" -> path.toString), + cause = null) + } + private def quoteByDefault(elem: String): String = { "\"" + elem + "\"" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 17319eb46ad..e13d454e119 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1600,7 +1600,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def cannotUseAllColumnsForPartitionColumnsError(): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1154", + errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED", messageParameters = Map.empty) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 10197709438..af98bb13c73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -99,13 +99,15 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical // we fail the query if the partitioning information is specified. case c @ CreateTableV1(tableDesc, _, None) if tableDesc.schema.isEmpty => if (tableDesc.bucketSpec.isDefined) { - failAnalysis("Cannot specify bucketing information if the table schema is not specified " + - "when creating and will be inferred at runtime") + throw new AnalysisException( + errorClass = "SPECIFY_BUCKETING_IS_NOT_ALLOWED", + messageParameters = Map.empty + ) } if (tableDesc.partitionColumnNames.nonEmpty) { - failAnalysis("It is not allowed to specify partition columns when the table schema is " + - "not defined. When the table schema is not provided, schema and partition columns " + - "will be inferred.") + throw new AnalysisException( + errorClass = "SPECIFY_PARTITION_IS_NOT_ALLOWED", + messageParameters = Map.empty) } c @@ -316,7 +318,9 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical SchemaUtils.checkColumnNameDuplication(normalizedPartitionCols, conf.resolver) if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) { - failAnalysis("Cannot use all columns for partition columns") + throw new AnalysisException( + errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED", + messageParameters = Map.empty) } schema.filter(f => normalizedPartitionCols.contains(f.name)).map(_.dataType).foreach { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index dd126027b36..4e498e6a754 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -429,7 +429,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { if (userSpecifiedSchema.isEmpty && userSpecifiedPartitionCols.nonEmpty) { checkError( exception = intercept[AnalysisException](sql(sqlCreateTable)), - errorClass = null, + errorClass = "SPECIFY_PARTITION_IS_NOT_ALLOWED", parameters = Map.empty ) } else { @@ -1316,19 +1316,21 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { withTable("jsonTable") { (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath) - val e = intercept[AnalysisException] { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${tempDir.getCanonicalPath}' - |) - |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) INTO 2 BUCKETS - """.stripMargin) - } - assert(e.message == "Cannot specify bucketing information if the table schema is not " + - "specified when creating and will be inferred at runtime") + checkError( + exception = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '${tempDir.getCanonicalPath}' + |) + |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) INTO 2 BUCKETS + """.stripMargin) + }, + errorClass = "SPECIFY_BUCKETING_IS_NOT_ALLOWED", + parameters = Map.empty + ) } } } @@ -2247,8 +2249,8 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { exception = intercept[SparkException] { sql(s"ADD FILE $testDir") }, - errorClass = null, - parameters = Map.empty + errorClass = "UNSUPPORTED_ADD_FILE.DIRECTORY", + parameters = Map("path" -> s"file:${testDir.getCanonicalPath}/") ) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 99728e1705b..b1eadea42e0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -42,6 +42,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils} +import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} import org.apache.spark.sql.hive.client.HiveClient @@ -156,10 +157,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat case st: StructType => verifyNestedColumnNames(st) case _ if invalidChars.exists(f.name.contains) => val invalidCharsString = invalidChars.map(c => s"'$c'").mkString(", ") - val errMsg = "Cannot create a table having a nested column whose name contains " + - s"invalid characters ($invalidCharsString) in Hive metastore. Table: $tableName; " + - s"Column: ${f.name}" - throw new AnalysisException(errMsg) + throw new AnalysisException( + errorClass = "INVALID_HIVE_COLUMN_NAME", + messageParameters = Map( + "invalidChars" -> invalidCharsString, + "tableName" -> toSQLId(tableName.nameParts), + "columnName" -> toSQLId(f.name) + )) case _ => } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f310f7ddfdf..8e5f0b8b507 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2875,9 +2875,6 @@ class HiveDDLSuite } test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { - val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " + - "characters (',', ':', ';') in Hive metastore." - Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => withTable("t") { checkError( @@ -2888,8 +2885,11 @@ class HiveDDLSuite .format("hive") .saveAsTable("t") }, - errorClass = null, - parameters = Map.empty + errorClass = "INVALID_HIVE_COLUMN_NAME", + parameters = Map( + "invalidChars" -> "',', ':', ';'", + "tableName" -> "`spark_catalog`.`default`.`t`", + "columnName" -> s"`$nestedColumnName`") ) } } @@ -3362,7 +3362,7 @@ class HiveDDLSuite exception = intercept[AnalysisException] { sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c1) STORED AS PARQUET") }, - errorClass = null, + errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED", parameters = Map.empty ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org