[spark] branch master updated: [SPARK-44208][CORE][SQL] Assign clear error class names for some logic that directly uses exceptions

maxgekk Thu, 29 Jun 2023 07:31:29 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new a9129defc0e [SPARK-44208][CORE][SQL] Assign clear error class names 
for some logic that directly uses exceptions
a9129defc0e is described below

commit a9129defc0ebbe68f20ec888352c30a90925d7ea
Author: panbingkun <pbk1...@gmail.com>
AuthorDate: Thu Jun 29 17:31:03 2023 +0300

    [SPARK-44208][CORE][SQL] Assign clear error class names for some logic that 
directly uses exceptions
    
    ### What changes were proposed in this pull request?
    The pr aims to assign clear error class names for some logic that directly 
uses exceptions, include:
    - ALL_PARTITION_COLUMNS_NOT_ALLOWED
    - INVALID_HIVE_COLUMN_NAME
    - SPECIFY_BUCKETING_IS_NOT_ALLOWED
    - SPECIFY_PARTITION_IS_NOT_ALLOWED
    - UNSUPPORTED_ADD_FILE.DIRECTORY
    - UNSUPPORTED_ADD_FILE.LOCAL_DIRECTORY
    
    ### Why are the changes needed?
    The changes improve the error framework.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    - Update UT.
    - Pass GA.
    
    Closes #41740 from panbingkun/assign_new_name.
    
    Lead-authored-by: panbingkun <pbk1...@gmail.com>
    Co-authored-by: panbingkun <84731...@qq.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../src/main/resources/error/error-classes.json    | 42 +++++++++++++++++++---
 .../main/scala/org/apache/spark/SparkContext.scala |  7 ++--
 .../org/apache/spark/errors/SparkCoreErrors.scala  | 14 ++++++++
 .../spark/sql/errors/QueryCompilationErrors.scala  |  2 +-
 .../spark/sql/execution/datasources/rules.scala    | 16 +++++----
 .../spark/sql/execution/command/DDLSuite.scala     | 34 +++++++++---------
 .../spark/sql/hive/HiveExternalCatalog.scala       | 12 ++++---
 .../spark/sql/hive/execution/HiveDDLSuite.scala    | 12 +++----
 8 files changed, 97 insertions(+), 42 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 192a0747dfd..6db8c5e3bf1 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -4,6 +4,11 @@
       "Non-deterministic expression <sqlExpr> should not appear in the 
arguments of an aggregate function."
     ]
   },
+  "ALL_PARTITION_COLUMNS_NOT_ALLOWED" : {
+    "message" : [
+      "Cannot use all columns for partition columns."
+    ]
+  },
   "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE" : {
     "message" : [
       "ALTER TABLE <type> column <columnName> specifies descriptor 
\"<optionName>\" more than once, which is invalid."
@@ -1180,6 +1185,11 @@
     ],
     "sqlState" : "22023"
   },
+  "INVALID_HIVE_COLUMN_NAME" : {
+    "message" : [
+      "Cannot create the table <tableName> having the nested column 
<columnName> whose name contains invalid characters <invalidChars> in Hive 
metastore."
+    ]
+  },
   "INVALID_IDENTIFIER" : {
     "message" : [
       "The identifier <ident> is invalid. Please, consider quoting it with 
back-quotes as `<ident>`."
@@ -2081,6 +2091,16 @@
       "sortBy must be used together with bucketBy."
     ]
   },
+  "SPECIFY_BUCKETING_IS_NOT_ALLOWED" : {
+    "message" : [
+      "Cannot specify bucketing information if the table schema is not 
specified when creating and will be inferred at runtime."
+    ]
+  },
+  "SPECIFY_PARTITION_IS_NOT_ALLOWED" : {
+    "message" : [
+      "It is not allowed to specify partition columns when the table schema is 
not defined. When the table schema is not provided, schema and partition 
columns will be inferred."
+    ]
+  },
   "SQL_CONF_NOT_FOUND" : {
     "message" : [
       "The SQL config <sqlConf> cannot be found. Please verify that the config 
exists."
@@ -2303,6 +2323,23 @@
       "Attempted to unset non-existent properties [<properties>] in table 
<table>."
     ]
   },
+  "UNSUPPORTED_ADD_FILE" : {
+    "message" : [
+      "Don't support add file."
+    ],
+    "subClass" : {
+      "DIRECTORY" : {
+        "message" : [
+          "The file <path> is a directory, consider to set 
\"spark.sql.legacy.addSingleFileInAddFile\" to \"false\"."
+        ]
+      },
+      "LOCAL_DIRECTORY" : {
+        "message" : [
+          "The local directory <path> is not supported in a non-local master 
mode."
+        ]
+      }
+    }
+  },
   "UNSUPPORTED_ARROWTYPE" : {
     "message" : [
       "Unsupported arrow type <typeName>."
@@ -3588,11 +3625,6 @@
       "Cannot use <field> for partition column."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1154" : {
-    "message" : [
-      "Cannot use all columns for partition columns."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1155" : {
     "message" : [
       "Partition column `<col>` not found in schema <schemaCatalog>."
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 24d788ff5bc..78c7ecb2782 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -44,6 +44,7 @@ import org.apache.logging.log4j.Level
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
+import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.{Executor, ExecutorMetrics, 
ExecutorMetricsSource}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, 
PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
 import org.apache.spark.internal.Logging
@@ -1737,12 +1738,10 @@ class SparkContext(config: SparkConf) extends Logging {
       val fs = hadoopPath.getFileSystem(hadoopConfiguration)
       val isDir = fs.getFileStatus(hadoopPath).isDirectory
       if (!isLocal && scheme == "file" && isDir) {
-        throw new SparkException(s"addFile does not support local directories 
when not running " +
-          "local mode.")
+        throw SparkCoreErrors.addLocalDirectoryError(hadoopPath)
       }
       if (!recursive && isDir) {
-        throw new SparkException(s"Added file $hadoopPath is a directory and 
recursive is not " +
-          "turned on.")
+        throw SparkCoreErrors.addDirectoryError(hadoopPath)
       }
     } else {
       // SPARK-17650: Make sure this is a valid URL before adding it to the 
list of dependencies
diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala 
b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
index f8e7f2db259..109086c4db6 100644
--- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
+++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
@@ -467,6 +467,20 @@ private[spark] object SparkCoreErrors {
         "receivedBytes" -> receivedBytes.toString).asJava)
   }
 
+  def addLocalDirectoryError(path: Path): Throwable = {
+    new SparkException(
+      errorClass = "UNSUPPORTED_ADD_FILE.LOCAL_DIRECTORY",
+       messageParameters = Map("path" -> path.toString),
+      cause = null)
+  }
+
+  def addDirectoryError(path: Path): Throwable = {
+    new SparkException(
+      errorClass = "UNSUPPORTED_ADD_FILE.DIRECTORY",
+      messageParameters = Map("path" -> path.toString),
+      cause = null)
+  }
+
   private def quoteByDefault(elem: String): String = {
     "\"" + elem + "\""
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 17319eb46ad..e13d454e119 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1600,7 +1600,7 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
 
   def cannotUseAllColumnsForPartitionColumnsError(): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1154",
+      errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED",
       messageParameters = Map.empty)
   }
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 10197709438..af98bb13c73 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -99,13 +99,15 @@ case class PreprocessTableCreation(catalog: SessionCatalog) 
extends Rule[Logical
     // we fail the query if the partitioning information is specified.
     case c @ CreateTableV1(tableDesc, _, None) if tableDesc.schema.isEmpty =>
       if (tableDesc.bucketSpec.isDefined) {
-        failAnalysis("Cannot specify bucketing information if the table schema 
is not specified " +
-          "when creating and will be inferred at runtime")
+        throw new AnalysisException(
+          errorClass = "SPECIFY_BUCKETING_IS_NOT_ALLOWED",
+          messageParameters = Map.empty
+        )
       }
       if (tableDesc.partitionColumnNames.nonEmpty) {
-        failAnalysis("It is not allowed to specify partition columns when the 
table schema is " +
-          "not defined. When the table schema is not provided, schema and 
partition columns " +
-          "will be inferred.")
+        throw new AnalysisException(
+          errorClass = "SPECIFY_PARTITION_IS_NOT_ALLOWED",
+          messageParameters = Map.empty)
       }
       c
 
@@ -316,7 +318,9 @@ case class PreprocessTableCreation(catalog: SessionCatalog) 
extends Rule[Logical
     SchemaUtils.checkColumnNameDuplication(normalizedPartitionCols, 
conf.resolver)
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
-      failAnalysis("Cannot use all columns for partition columns")
+      throw new AnalysisException(
+        errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED",
+        messageParameters = Map.empty)
     }
 
     schema.filter(f => 
normalizedPartitionCols.contains(f.name)).map(_.dataType).foreach {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index dd126027b36..4e498e6a754 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -429,7 +429,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase 
{
       if (userSpecifiedSchema.isEmpty && userSpecifiedPartitionCols.nonEmpty) {
         checkError(
           exception = intercept[AnalysisException](sql(sqlCreateTable)),
-          errorClass = null,
+          errorClass = "SPECIFY_PARTITION_IS_NOT_ALLOWED",
           parameters = Map.empty
         )
       } else {
@@ -1316,19 +1316,21 @@ abstract class DDLSuite extends QueryTest with 
DDLSuiteBase {
       withTable("jsonTable") {
         (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath)
 
-        val e = intercept[AnalysisException] {
-        sql(
-          s"""
-             |CREATE TABLE jsonTable
-             |USING org.apache.spark.sql.json
-             |OPTIONS (
-             |  path '${tempDir.getCanonicalPath}'
-             |)
-             |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) 
INTO 2 BUCKETS
-           """.stripMargin)
-        }
-        assert(e.message == "Cannot specify bucketing information if the table 
schema is not " +
-          "specified when creating and will be inferred at runtime")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(
+              s"""
+                 |CREATE TABLE jsonTable
+                 |USING org.apache.spark.sql.json
+                 |OPTIONS (
+                 |  path '${tempDir.getCanonicalPath}'
+                 |)
+                 |CLUSTERED BY (nonexistentColumnA) SORTED BY 
(nonexistentColumnB) INTO 2 BUCKETS
+               """.stripMargin)
+          },
+          errorClass = "SPECIFY_BUCKETING_IS_NOT_ALLOWED",
+          parameters = Map.empty
+        )
       }
     }
   }
@@ -2247,8 +2249,8 @@ abstract class DDLSuite extends QueryTest with 
DDLSuiteBase {
           exception = intercept[SparkException] {
             sql(s"ADD FILE $testDir")
           },
-          errorClass = null,
-          parameters = Map.empty
+          errorClass = "UNSUPPORTED_ADD_FILE.DIRECTORY",
+          parameters = Map("path" -> s"file:${testDir.getCanonicalPath}/")
         )
       }
     }
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 99728e1705b..b1eadea42e0 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -42,6 +42,7 @@ import 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, 
CharVarcharUtils}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, 
SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
@@ -156,10 +157,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
           case st: StructType => verifyNestedColumnNames(st)
           case _ if invalidChars.exists(f.name.contains) =>
             val invalidCharsString = invalidChars.map(c => 
s"'$c'").mkString(", ")
-            val errMsg = "Cannot create a table having a nested column whose 
name contains " +
-              s"invalid characters ($invalidCharsString) in Hive metastore. 
Table: $tableName; " +
-              s"Column: ${f.name}"
-            throw new AnalysisException(errMsg)
+            throw new AnalysisException(
+              errorClass = "INVALID_HIVE_COLUMN_NAME",
+              messageParameters = Map(
+                "invalidChars" -> invalidCharsString,
+                "tableName" -> toSQLId(tableName.nameParts),
+                "columnName" -> toSQLId(f.name)
+              ))
           case _ =>
         }
       }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f310f7ddfdf..8e5f0b8b507 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2875,9 +2875,6 @@ class HiveDDLSuite
   }
 
   test("SPARK-24681 checks if nested column names do not include ',', ':', and 
';'") {
-    val expectedMsg = "Cannot create a table having a nested column whose name 
contains invalid " +
-      "characters (',', ':', ';') in Hive metastore."
-
     Seq("nested,column", "nested:column", "nested;column").foreach { 
nestedColumnName =>
       withTable("t") {
         checkError(
@@ -2888,8 +2885,11 @@ class HiveDDLSuite
               .format("hive")
               .saveAsTable("t")
           },
-          errorClass = null,
-          parameters = Map.empty
+          errorClass = "INVALID_HIVE_COLUMN_NAME",
+          parameters = Map(
+            "invalidChars" -> "',', ':', ';'",
+            "tableName" -> "`spark_catalog`.`default`.`t`",
+            "columnName" -> s"`$nestedColumnName`")
         )
       }
     }
@@ -3362,7 +3362,7 @@ class HiveDDLSuite
       exception = intercept[AnalysisException] {
         sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c1) STORED AS PARQUET")
       },
-      errorClass = null,
+      errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED",
       parameters = Map.empty
     )
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-44208][CORE][SQL] Assign clear error class names for some logic that directly uses exceptions

Reply via email to