[GitHub] [spark] karenfeng commented on a change in pull request #33805: [SPARK-36302][SQL] Refactor thirteenth set of 20 query execution errors to use error classes

GitBox Tue, 07 Sep 2021 15:50:28 -0700


karenfeng commented on a change in pull request #33805:
URL: https://github.com/apache/spark/pull/33805#discussion_r703900552




##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -1,16 +1,50 @@
 {
+  "ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR" : {

Review comment:
       Given that this is an error class, you can remove the `_ERROR` suffix to 
reduce redundancy and clutter.

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -1,16 +1,50 @@
 {
+  "ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "ALTER TABLE ... DROP PARTITION ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "AMBIGUOUS_FIELD_NAME" : {
     "message" : [ "Field name %s is ambiguous and has %s matching fields in 
the struct." ],
     "sqlState" : "42000"
   },
+  "CANNOT_FETCH_TABLES_OF_DATABASE_ERROR" : {
+    "message" : [ "Unable to fetch tables of db %s" ]
+  },
+  "CANNOT_RECOGNIZE_HIVE_TYPE_ERROR" : {
+    "message" : [ "Cannot recognize hive type string: %s, column: %s" ],
+    "sqlState" : "0D000"

Review comment:
       I'm not sure about 0D000; would 42000 be a better fit?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -1,16 +1,50 @@
 {
+  "ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "ALTER TABLE ... DROP PARTITION ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "AMBIGUOUS_FIELD_NAME" : {
     "message" : [ "Field name %s is ambiguous and has %s matching fields in 
the struct." ],
     "sqlState" : "42000"
   },
+  "CANNOT_FETCH_TABLES_OF_DATABASE_ERROR" : {
+    "message" : [ "Unable to fetch tables of db %s" ]
+  },
+  "CANNOT_RECOGNIZE_HIVE_TYPE_ERROR" : {
+    "message" : [ "Cannot recognize hive type string: %s, column: %s" ],
+    "sqlState" : "0D000"
+  },
+  "CANNOT_SET_TIMEOUT_DURATION_ERROR" : {
+    "message" : [ "Cannot set timeout duration without enabling processing 
time timeout in [map|flatMap]GroupsWithState" ],
+    "sqlState" : "42000"
+  },
+  "CONVERT_HIVE_TABLE_TO_CATALOG_TABLE_ERROR" : {
+    "message" : [ "%s, db: %s, table: %s" ],
+    "sqlState" : "42000"
+  },
   "DIVIDE_BY_ZERO" : {
     "message" : [ "divide by zero" ],
     "sqlState" : "22012"
   },
+  "DROP_TABLE_WITH_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "DROP TABLE ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "DUPLICATE_KEY" : {
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "FAILED_RENAME_TEMP_FILE_ERROR" : {
+    "message" : [ "Failed to rename temp file %s to %s as rename returned 
false" ],
+    "sqlState" : "42000"
+  },
+  "GET_PARTITION_METADATA_BY_FILTER_ERROR" : {
+    "message" : [ "Caught Hive MetaException attempting to get partition 
metadata by filter\nfrom Hive. You can set the Spark configuration setting\n%s 
to true to work around\nthis problem, however this will result in degraded 
performance. Please\nreport a bug: https://issues.apache.org/jira/browse/SPARK"; 
]

Review comment:
       I believe this error message is meant to be a single string without any 
newlines. Given how complex this is, however, we can turn this into an array of 
strings which will be joined with newlines for the user. In this case, it would 
look like:
   ```
   "message" : [
     "Caught Hive MetaException attempting to get partition metadata by filter 
from Hive.",
     "You can set the Spark configuration setting %s to true to work around 
this problem, however this will result in degraded performance.",
     "Please report a bug: https://issues.apache.org/jira/browse/SPARK"; ]
   ```

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -52,17 +101,44 @@
     "message" : [ "PARTITION clause cannot contain a non-partition column 
name: %s" ],
     "sqlState" : "42000"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA_ERROR" : {
+    "message" : [ "Partition column %s not found in schema %s" ]
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [ "Invalid pivot value '%s': value data type %s does not match 
pivot column data type %s" ],
     "sqlState" : "42000"
   },
+  "RENAME_AS_EXISTS_PATH_ERROR" : {
+    "message" : [ "Failed to rename as %s already exists" ],
+    "sqlState" : "42000"
+  },
+  "RENAME_PATH_AS_EXISTS_PATH_ERROR" : {
+    "message" : [ "Failed to rename %s to %s as destination already exists" ],
+    "sqlState" : "42000"
+  },
+  "RENAME_SRC_PATH_NOT_FOUND_ERROR" : {
+    "message" : [ "Failed to rename as %s was not found" ],
+    "sqlState" : "42000"
+  },
   "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : {
     "message" : [ "The second argument of '%s' function needs to be an 
integer." ],
     "sqlState" : "22023"
   },
+  "SERDE_INTERFACE_NOT_FOUND_ERROR" : {
+    "message" : [ "The SerDe interface removed since Hive 2.3(HIVE-15167). 
Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more 
details." ],
+    "sqlState" : "42000"

Review comment:
       This may be 0A000

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
##########
@@ -1283,126 +1283,145 @@ object QueryExecutionErrors {
   }
 
   def serDeInterfaceNotFoundError(e: NoClassDefFoundError): Throwable = {
-    new ClassNotFoundException("The SerDe interface removed since Hive 
2.3(HIVE-15167)." +
-      " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more 
details.", e)
+    new SparkClassNotFoundException(
+      errorClass = "SERDE_INTERFACE_NOT_FOUND_ERROR",
+      messageParameters = Array.empty, e)
   }
 
   def convertHiveTableToCatalogTableError(
       e: SparkException, dbName: String, tableName: String): Throwable = {
-    new SparkException(s"${e.getMessage}, db: $dbName, table: $tableName", e)
+    new SparkException(
+      errorClass = "CONVERT_HIVE_TABLE_TO_CATALOG_TABLE_ERROR",
+      messageParameters = Array(e.toString, dbName, tableName ), e)
   }
 
   def cannotRecognizeHiveTypeError(
       e: ParseException, fieldType: String, fieldName: String): Throwable = {
     new SparkException(
-      s"Cannot recognize hive type string: $fieldType, column: $fieldName", e)
+      errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE_ERROR",
+      messageParameters = Array(fieldType, fieldName), e)
   }
 
   def getTablesByTypeUnsupportedByHiveVersionError(): Throwable = {
-    new UnsupportedOperationException("Hive 2.2 and lower versions don't 
support " +
-      "getTablesByType. Please use Hive 2.3 or higher version.")
+    new SparkUnsupportedOperationException(
+      errorClass = "GET_TABLES_BY_TYPE_UNSUPPORTED_BY_HIVE_VERSION_ERROR",
+      messageParameters = Array.empty
+    )
   }
 
   def dropTableWithPurgeUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("DROP TABLE ... PURGE")
+    new SparkUnsupportedOperationException(
+      errorClass = "DROP_TABLE_WITH_PURGE_UNSUPPORTED_ERROR",
+      messageParameters = Array.empty
+    )
   }
 
   def alterTableWithDropPartitionAndPurgeUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("ALTER TABLE ... DROP PARTITION ... 
PURGE")
+    new SparkUnsupportedOperationException(
+      errorClass = 
"ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR",
+      messageParameters = Array.empty
+    )
   }
 
   def invalidPartitionFilterError(): Throwable = {
-    new UnsupportedOperationException(
-      """Partition filter cannot have both `"` and `'` characters""")
+    new SparkUnsupportedOperationException(
+      errorClass = "INVALID_PARTITION_FILTER_ERROR",
+      messageParameters = Array.empty
+    )
   }
 
   def getPartitionMetadataByFilterError(e: InvocationTargetException): 
Throwable = {
-    new RuntimeException(
-      s"""
-         |Caught Hive MetaException attempting to get partition metadata by 
filter
-         |from Hive. You can set the Spark configuration setting
-         |${SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION} to 
true to work around
-         |this problem, however this will result in degraded performance. 
Please
-         |report a bug: https://issues.apache.org/jira/browse/SPARK
-       """.stripMargin.replaceAll("\n", " "), e)
+    new SparkRuntimeException(
+      errorClass = "GET_PARTITION_METADATA_BY_FILTER_ERROR",
+      messageParameters = Array(
+        
SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION.toString),
+      e)
   }
 
   def unsupportedHiveMetastoreVersionError(version: String, key: String): 
Throwable = {
-    new UnsupportedOperationException(s"Unsupported Hive Metastore version 
($version). " +
-      s"Please set $key with a valid version.")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_HIVE_METASTORE_VERSION_ERROR",
+      messageParameters = Array(version, key)
+    )
   }
 
   def loadHiveClientCausesNoClassDefFoundError(
       cnf: NoClassDefFoundError,
       execJars: Seq[URL],
       key: String,
       e: InvocationTargetException): Throwable = {
-    new ClassNotFoundException(
-      s"""
-         |$cnf when creating Hive client using classpath: 
${execJars.mkString(", ")}\n
-         |Please make sure that jars for your version of hive and hadoop are 
included in the
-         |paths passed to $key.
-       """.stripMargin.replaceAll("\n", " "), e)
+    new SparkClassNotFoundException(
+      errorClass = "LOAD_HIVE_CLIENT_CAUSES_NO_CLASS_DEF_FOUND_ERROR",
+      messageParameters = Array(cnf.toString, execJars.mkString(", "), key ), 
e)
   }
 
   def cannotFetchTablesOfDatabaseError(dbName: String, e: Exception): 
Throwable = {
-    new SparkException(s"Unable to fetch tables of db $dbName", e)
+    new SparkException(
+      errorClass = "CANNOT_FETCH_TABLES_OF_DATABASE_ERROR",
+      messageParameters = Array(dbName), e)
   }
 
   def illegalLocationClauseForViewPartitionError(): Throwable = {
-    new SparkException("LOCATION clause illegal for view partition")
+    new SparkException(
+      errorClass = "ILLEGAL_LOCATION_CLAUSE_FOR_VIEW_PARTITION_ERROR",
+      messageParameters = Array.empty, null
+    )
   }
 
   def renamePathAsExistsPathError(srcPath: Path, dstPath: Path): Throwable = {
-    new FileAlreadyExistsException(
-      s"Failed to rename $srcPath to $dstPath as destination already exists")
+    new SparkFileAlreadyExistsException(
+      errorClass = "RENAME_PATH_AS_EXISTS_PATH_ERROR",
+      messageParameters = Array(srcPath.toString, dstPath.toString))
   }
 
   def renameAsExistsPathError(dstPath: Path): Throwable = {
-    new FileAlreadyExistsException(s"Failed to rename as $dstPath already 
exists")
+    new SparkFileAlreadyExistsException(
+      errorClass = "RENAME_PATH_AS_EXISTS_PATH_ERROR",

Review comment:
       Do you meant to use the `RENAME_PATH_AS_EXISTS_PATH_ERROR` error class?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -1,16 +1,50 @@
 {
+  "ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "ALTER TABLE ... DROP PARTITION ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "AMBIGUOUS_FIELD_NAME" : {
     "message" : [ "Field name %s is ambiguous and has %s matching fields in 
the struct." ],
     "sqlState" : "42000"
   },
+  "CANNOT_FETCH_TABLES_OF_DATABASE_ERROR" : {
+    "message" : [ "Unable to fetch tables of db %s" ]
+  },
+  "CANNOT_RECOGNIZE_HIVE_TYPE_ERROR" : {
+    "message" : [ "Cannot recognize hive type string: %s, column: %s" ],
+    "sqlState" : "0D000"
+  },
+  "CANNOT_SET_TIMEOUT_DURATION_ERROR" : {
+    "message" : [ "Cannot set timeout duration without enabling processing 
time timeout in [map|flatMap]GroupsWithState" ],
+    "sqlState" : "42000"
+  },
+  "CONVERT_HIVE_TABLE_TO_CATALOG_TABLE_ERROR" : {
+    "message" : [ "%s, db: %s, table: %s" ],
+    "sqlState" : "42000"
+  },
   "DIVIDE_BY_ZERO" : {
     "message" : [ "divide by zero" ],
     "sqlState" : "22012"
   },
+  "DROP_TABLE_WITH_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "DROP TABLE ... PURGE" ],
+    "sqlState" : "42000"

Review comment:
       0A000 may fit better here

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -1,16 +1,50 @@
 {
+  "ALTER_TABLE_WITH_DROP_PARTITION_AND_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "ALTER TABLE ... DROP PARTITION ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "AMBIGUOUS_FIELD_NAME" : {
     "message" : [ "Field name %s is ambiguous and has %s matching fields in 
the struct." ],
     "sqlState" : "42000"
   },
+  "CANNOT_FETCH_TABLES_OF_DATABASE_ERROR" : {
+    "message" : [ "Unable to fetch tables of db %s" ]
+  },
+  "CANNOT_RECOGNIZE_HIVE_TYPE_ERROR" : {
+    "message" : [ "Cannot recognize hive type string: %s, column: %s" ],
+    "sqlState" : "0D000"
+  },
+  "CANNOT_SET_TIMEOUT_DURATION_ERROR" : {
+    "message" : [ "Cannot set timeout duration without enabling processing 
time timeout in [map|flatMap]GroupsWithState" ],
+    "sqlState" : "42000"
+  },
+  "CONVERT_HIVE_TABLE_TO_CATALOG_TABLE_ERROR" : {
+    "message" : [ "%s, db: %s, table: %s" ],
+    "sqlState" : "42000"
+  },
   "DIVIDE_BY_ZERO" : {
     "message" : [ "divide by zero" ],
     "sqlState" : "22012"
   },
+  "DROP_TABLE_WITH_PURGE_UNSUPPORTED_ERROR" : {
+    "message" : [ "DROP TABLE ... PURGE" ],
+    "sqlState" : "42000"
+  },
   "DUPLICATE_KEY" : {
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "FAILED_RENAME_TEMP_FILE_ERROR" : {
+    "message" : [ "Failed to rename temp file %s to %s as rename returned 
false" ],
+    "sqlState" : "42000"
+  },
+  "GET_PARTITION_METADATA_BY_FILTER_ERROR" : {
+    "message" : [ "Caught Hive MetaException attempting to get partition 
metadata by filter\nfrom Hive. You can set the Spark configuration setting\n%s 
to true to work around\nthis problem, however this will result in degraded 
performance. Please\nreport a bug: https://issues.apache.org/jira/browse/SPARK"; 
]
+  },
+  "GET_TABLES_BY_TYPE_UNSUPPORTED_BY_HIVE_VERSION_ERROR" : {
+    "message" : [ "Hive 2.2 and lower versions don't support getTablesByType. 
Please use Hive 2.3 or higher version." ],
+    "sqlState" : "42000"

Review comment:
       0A000 may be a better fit

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -36,6 +73,18 @@
   "INVALID_JSON_SCHEMA_MAPTYPE" : {
     "message" : [ "Input schema %s can only contain StringType as a key type 
for a MapType." ]
   },
+  "INVALID_PARTITION_FILTER_ERROR" : {
+    "message" : [ "Partition filter cannot have both `\"` and `\\'` 
characters" ],
+    "sqlState" : "42000"
+  },
+  "LEGACY_METADATA_PATH_EXISTS_ERROR" : {
+    "message" : [ "Error: we detected a possible problem with the location of 
your \"_spark_metadata\"\ndirectory and you likely need to move it before 
restarting this query.\n\nEarlier version of Spark incorrectly escaped paths 
when writing out the\n\"_spark_metadata\" directory for structured streaming. 
While this was corrected in\nSpark 3.0, it appears that your query was started 
using an earlier version that\nincorrectly handled the \"_spark_metadata\" 
path.\n\nCorrect \"_spark_metadata\" Directory: %s\nIncorrect 
\"_spark_metadata\" Directory: %s\n\nPlease move the data from the incorrect 
directory to the correct one, delete the\nincorrect directory, and then restart 
this query. If you believe you are receiving\nthis message in error, you can 
disable it with the SQL conf\n%s." ],
+    "sqlState" : "42000"
+  },
+  "LOAD_HIVE_CLIENT_CAUSES_NO_CLASS_DEF_FOUND_ERROR" : {
+    "message" : [ "%s when creating Hive client using classpath: %s\nPlease 
make sure that jars for your version of hive and hadoop are included in 
the\npaths passed to %s." ],

Review comment:
       You can also split this one.

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -36,6 +73,18 @@
   "INVALID_JSON_SCHEMA_MAPTYPE" : {
     "message" : [ "Input schema %s can only contain StringType as a key type 
for a MapType." ]
   },
+  "INVALID_PARTITION_FILTER_ERROR" : {
+    "message" : [ "Partition filter cannot have both `\"` and `\\'` 
characters" ],
+    "sqlState" : "42000"
+  },
+  "LEGACY_METADATA_PATH_EXISTS_ERROR" : {
+    "message" : [ "Error: we detected a possible problem with the location of 
your \"_spark_metadata\"\ndirectory and you likely need to move it before 
restarting this query.\n\nEarlier version of Spark incorrectly escaped paths 
when writing out the\n\"_spark_metadata\" directory for structured streaming. 
While this was corrected in\nSpark 3.0, it appears that your query was started 
using an earlier version that\nincorrectly handled the \"_spark_metadata\" 
path.\n\nCorrect \"_spark_metadata\" Directory: %s\nIncorrect 
\"_spark_metadata\" Directory: %s\n\nPlease move the data from the incorrect 
directory to the correct one, delete the\nincorrect directory, and then restart 
this query. If you believe you are receiving\nthis message in error, you can 
disable it with the SQL conf\n%s." ],

Review comment:
       You can split the message here into array elements, which will be 
re-joined with newlines later.

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -52,17 +101,44 @@
     "message" : [ "PARTITION clause cannot contain a non-partition column 
name: %s" ],
     "sqlState" : "42000"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA_ERROR" : {
+    "message" : [ "Partition column %s not found in schema %s" ]
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [ "Invalid pivot value '%s': value data type %s does not match 
pivot column data type %s" ],
     "sqlState" : "42000"
   },
+  "RENAME_AS_EXISTS_PATH_ERROR" : {

Review comment:
       This doesn't make much sense grammatically; maybe 
`RENAME_OVERWRITES_EXISTING_PATH`? We can also consolidate this with 
`RENAME_PATH_AS_EXISTS_PATH_ERROR`, and have everyone throw this error class.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] karenfeng commented on a change in pull request #33805: [SPARK-36302][SQL] Refactor thirteenth set of 20 query execution errors to use error classes

Reply via email to