[GitHub] [spark] karenfeng commented on a change in pull request #34093: [SPARK-36294][SQL] Refactor fifth set of 20 query execution errors to use error classes

GitBox Sat, 16 Oct 2021 17:43:22 -0700


karenfeng commented on a change in pull request #34093:
URL: https://github.com/apache/spark/pull/34093#discussion_r729268536




##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -39,9 +57,31 @@
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "END_OF_STREAM" : {
+    "message" : [ "End of stream" ]
+  },
+  "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN" : {
+    "message" : [ "Failed to cast value `%s` to `%s` for partition column 
`%s`" ],
+    "sqlState" : "22023"

Review comment:
       Would 22005 (error in assignment) be a better fit?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -39,9 +53,32 @@
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "END_OF_STREAM" : {
+    "message" : [ "End of stream" ]
+  },
+  "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN" : {
+    "message" : [ "Failed to cast value `%s` to `%s` for partition column 
`%s`" ],
+    "sqlState" : "22023"
+  },
   "FAILED_EXECUTE_UDF" : {
     "message" : [ "Failed to execute user defined function (%s: (%s) => %s)" ]
   },
+  "FAILED_FALLBACK_V1_BECAUSE_OF_INCONSISTENT_SCHEMA" : {
+    "message" : [ "The fallback v1 relation reports inconsistent schema:", 
"Schema of v2 scan:     %s", "Schema of v1 relation: %s" ],
+    "sqlState" : "22023"
+  },
+  "FAILED_FIND_DATA_SOURCE" : {
+    "message" : [ "Failed to find data source: %s. Please find packages at 
http://spark.apache.org/third-party-projects.html"; ]
+  },
+  "FAILED_FORMAT_DATETIME_IN_NEW_FORMATTER" : {
+    "message" : [ "Fail to format it to '%s' in the new formatter. You can set 
%s to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and 
treat it as an invalid datetime string." ]
+  },
+  "FAILED_PARSE_DATETIME_IN_NEW_PARSER" : {
+    "message" : [ "Fail to parse '%s' in the new parser. You can set %s to 
LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat 
it as an invalid datetime string." ]
+  },
+  "FAILED_RECOGNIZE_PATTERN_AFTER_UPGRADE" : {

Review comment:
       Eg: `FAILED_RECOGNIZE_DATE_TIME_PATTERN_AFTER_UPGRADE`

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -30,16 +44,43 @@
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "END_OF_STREAM" : {
+    "message" : [ "End of stream" ]
+  },
+  "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN" : {
+    "message" : [ "Failed to cast value `%s` to `%s` for partition column 
`%s`" ],
+    "sqlState" : "22023"
+  },
   "FAILED_EXECUTE_UDF" : {
     "message" : [ "Failed to execute user defined function (%s: (%s) => %s)" ]
   },
+  "FAILED_FALLBACK_V1_BECAUSE_OF_INCONSISTENT_SCHEMA" : {
+    "message" : [ "The fallback v1 relation reports inconsistent schema:", 
"Schema of v2 scan:     %s", "Schema of v1 relation: %s" ],
+    "sqlState" : "22023"

Review comment:
       When you change this to an internal error, please remove this.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
##########
@@ -435,131 +435,129 @@ object QueryExecutionErrors {
   }
 
   def createStreamingSourceNotSpecifySchemaError(): Throwable = {
-    new IllegalArgumentException(
-      s"""
-         |Schema must be specified when creating a streaming source DataFrame. 
If some
-         |files already exist in the directory, then depending on the file 
format you
-         |may be able to create a static DataFrame on that directory with
-         |'spark.read.load(directory)' and infer schema from it.
-       """.stripMargin)
+    new SparkIllegalArgumentException(
+      errorClass = "MISSING_STREAMING_SOURCE_SCHEMA",
+      messageParameters = Array.empty)
   }
 
   def streamedOperatorUnsupportedByDataSourceError(
       className: String, operator: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Data source $className does not support streamed $operator")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_STREAMED_OPERATOR_BY_DATA_SOURCE",
+      messageParameters = Array(className, operator))
   }
 
   def multiplePathsSpecifiedError(allPaths: Seq[String]): Throwable = {
-    new IllegalArgumentException("Expected exactly one path to be specified, 
but " +
-      s"got: ${allPaths.mkString(", ")}")
+    new SparkIllegalArgumentException(
+      errorClass = "SPECIFIED_MULTIPLE_PATHS",
+      messageParameters = Array(allPaths.mkString(", ")))
   }
 
   def failedToFindDataSourceError(provider: String, error: Throwable): 
Throwable = {
-    new ClassNotFoundException(
-      s"""
-         |Failed to find data source: $provider. Please find packages at
-         |http://spark.apache.org/third-party-projects.html
-       """.stripMargin, error)
+    new SparkClassNotFoundException(
+      errorClass = "FAILED_FIND_DATA_SOURCE",
+      messageParameters = Array(provider), error)
   }
 
   def removedClassInSpark2Error(className: String, e: Throwable): Throwable = {
-    new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
-      "Please check if your library is compatible with Spark 2.0", e)
+    new SparkClassNotFoundException(
+      errorClass = "CANNOT_FIND_CLASS_IN_SPARK2",
+      messageParameters = Array(className), e)
   }
 
   def incompatibleDataSourceRegisterError(e: Throwable): Throwable = {
-    new SparkClassNotFoundException("INCOMPATIBLE_DATASOURCE_REGISTER", 
Array(e.getMessage), e)
+    new SparkClassNotFoundException(
+      errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+      messageParameters = Array(e.getMessage), e)
   }
 
   def unrecognizedFileFormatError(format: String): Throwable = {
-    new IllegalStateException(s"unrecognized format $format")
+    new SparkIllegalStateException(
+      errorClass = "UNRECOGNIZED_FORMAT",
+      messageParameters = Array(format))
   }
 
   def sparkUpgradeInReadingDatesError(
       format: String, config: String, option: String): SparkUpgradeException = 
{
-    new SparkUpgradeException("3.0",
-      s"""
-         |reading dates before 1582-10-15 or timestamps before 
1900-01-01T00:00:00Z from $format
-         |files can be ambiguous, as the files may be written by Spark 2.x or 
legacy versions of
-         |Hive, which uses a legacy hybrid calendar that is different from 
Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set the 
SQL config
-         |'$config' or the datasource option '$option' to 'LEGACY' to rebase 
the datetime values
-         |w.r.t. the calendar difference during reading. To read the datetime 
values as it is,
-         |set the SQL config '$config' or the datasource option '$option' to 
'CORRECTED'.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      version = "3.0",
+      errorClass = "READING_AMBIGUOUS_DATES_AFTER_UPGRADE",
+      messageParameters = Array(format, config, option, config, option), null)
   }
 
   def sparkUpgradeInWritingDatesError(format: String, config: String): 
SparkUpgradeException = {
-    new SparkUpgradeException("3.0",
-      s"""
-         |writing dates before 1582-10-15 or timestamps before 
1900-01-01T00:00:00Z into $format
-         |files can be dangerous, as the files may be read by Spark 2.x or 
legacy versions of Hive
-         |later, which uses a legacy hybrid calendar that is different from 
Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set 
$config to 'LEGACY' to
-         |rebase the datetime values w.r.t. the calendar difference during 
writing, to get maximum
-         |interoperability. Or set $config to 'CORRECTED' to write the 
datetime values as it is,
-         |if you are 100% sure that the written files will only be read by 
Spark 3.0+ or other
-         |systems that use Proleptic Gregorian calendar.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      version = "3.0",
+      errorClass = "WRITING_AMBIGUOUS_DATES_AFTER_UPGRADE",
+      messageParameters = Array(format, config, config), null)
   }
 
   def buildReaderUnsupportedForFileFormatError(format: String): Throwable = {
-    new UnsupportedOperationException(s"buildReader is not supported for 
$format")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_BUILD_READER_FOR_FILE_FORMAT",
+      messageParameters = Array(format))
   }
 
   def jobAbortedError(cause: Throwable): Throwable = {
-    new SparkException("Job aborted.", cause)
+    new SparkException(
+      errorClass = "JOB_ABORTED",
+      messageParameters = Array.empty, cause)
   }
 
   def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = {
-    new SparkException("Task failed while writing rows.", cause)
+    new SparkException(
+      errorClass = "FAILED_TASK_WHILE_WRITING_ROWS",
+      messageParameters = Array.empty, cause)
   }
 
   def readCurrentFileNotFoundError(e: FileNotFoundException): Throwable = {
-    new FileNotFoundException(
-      s"""
-         |${e.getMessage}\n
-         |It is possible the underlying files have been updated. You can 
explicitly invalidate
-         |the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by
-         |recreating the Dataset/DataFrame involved.
-       """.stripMargin)
+    new SparkFileNotFoundException(
+      errorClass = "CANNOT_READ_CURRENT_FILE",
+      messageParameters = Array(e.getMessage))
   }
 
   def unsupportedSaveModeError(saveMode: String, pathExists: Boolean): 
Throwable = {
-    new IllegalStateException(s"unsupported save mode $saveMode ($pathExists)")
+    new SparkIllegalStateException(
+      errorClass = "UNSUPPORTED_SAVE_MODE",
+      messageParameters = Array(saveMode + " (" + pathExists.toString + ")"))

Review comment:
       Maybe we can make this into 2 different parameters? Then it would look 
like `%s (%s)`.

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -134,13 +181,28 @@
     "message" : [ "The second argument of '%s' function needs to be an 
integer." ],
     "sqlState" : "22023"
   },
+  "SPECIFIED_MULTIPLE_PATHS" : {
+    "message" : [ "Expected exactly one path to be specified, but got: %s" ],
+    "sqlState" : "22023"

Review comment:
       I think that 42000 may actually be a better fit than 22023; 22023 is a 
data exception. This is more of a syntax error. Can you apply this across your 
PR as well?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] karenfeng commented on a change in pull request #34093: [SPARK-36294][SQL] Refactor fifth set of 20 query execution errors to use error classes

Reply via email to