[GitHub] [spark] karenfeng commented on a change in pull request #34093: [SPARK-36294][SQL] Refactor fifth set of 20 query execution errors to use error classes

GitBox Mon, 11 Oct 2021 16:18:11 -0700


karenfeng commented on a change in pull request #34093:
URL: https://github.com/apache/spark/pull/34093#discussion_r726629302




##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
##########
@@ -435,131 +435,129 @@ object QueryExecutionErrors {
   }
 
   def createStreamingSourceNotSpecifySchemaError(): Throwable = {
-    new IllegalArgumentException(
-      s"""
-         |Schema must be specified when creating a streaming source DataFrame. 
If some
-         |files already exist in the directory, then depending on the file 
format you
-         |may be able to create a static DataFrame on that directory with
-         |'spark.read.load(directory)' and infer schema from it.
-       """.stripMargin)
+    new SparkIllegalArgumentException(
+      errorClass = "MISSING_STREAMING_SOURCE_SCHEMA",
+      messageParameters = Array.empty)
   }
 
   def streamedOperatorUnsupportedByDataSourceError(
       className: String, operator: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Data source $className does not support streamed $operator")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_STREAMED_OPERATOR_BY_DATA_SOURCE",
+      messageParameters = Array(className, operator))
   }
 
   def multiplePathsSpecifiedError(allPaths: Seq[String]): Throwable = {
-    new IllegalArgumentException("Expected exactly one path to be specified, 
but " +
-      s"got: ${allPaths.mkString(", ")}")
+    new SparkIllegalArgumentException(
+      errorClass = "SPECIFIED_MULTIPLE_PATHS",
+      messageParameters = Array(allPaths.mkString(", ")))
   }
 
   def failedToFindDataSourceError(provider: String, error: Throwable): 
Throwable = {
-    new ClassNotFoundException(
-      s"""
-         |Failed to find data source: $provider. Please find packages at
-         |http://spark.apache.org/third-party-projects.html
-       """.stripMargin, error)
+    new SparkClassNotFoundException(
+      errorClass = "FAILED_FIND_DATA_SOURCE",
+      messageParameters = Array(provider), error)
   }
 
   def removedClassInSpark2Error(className: String, e: Throwable): Throwable = {
-    new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
-      "Please check if your library is compatible with Spark 2.0", e)
+    new SparkClassNotFoundException(
+      errorClass = "CANNOT_FIND_CLASS_IN_SPARK2",
+      messageParameters = Array(className), e)
   }
 
   def incompatibleDataSourceRegisterError(e: Throwable): Throwable = {
-    new SparkClassNotFoundException("INCOMPATIBLE_DATASOURCE_REGISTER", 
Array(e.getMessage), e)
+    new SparkClassNotFoundException(
+      errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+      messageParameters = Array(e.getMessage), e)
   }
 
   def unrecognizedFileFormatError(format: String): Throwable = {
-    new IllegalStateException(s"unrecognized format $format")
+    new SparkIllegalStateException(
+      errorClass = "UNRECOGNIZED_FORMAT",
+      messageParameters = Array(format))
   }
 
   def sparkUpgradeInReadingDatesError(
       format: String, config: String, option: String): SparkUpgradeException = 
{
-    new SparkUpgradeException("3.0",
-      s"""
-         |reading dates before 1582-10-15 or timestamps before 
1900-01-01T00:00:00Z from $format
-         |files can be ambiguous, as the files may be written by Spark 2.x or 
legacy versions of
-         |Hive, which uses a legacy hybrid calendar that is different from 
Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set the 
SQL config
-         |'$config' or the datasource option '$option' to 'LEGACY' to rebase 
the datetime values
-         |w.r.t. the calendar difference during reading. To read the datetime 
values as it is,
-         |set the SQL config '$config' or the datasource option '$option' to 
'CORRECTED'.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      version = "3.0",
+      errorClass = "READING_AMBIGUOUS_DATES_AFTER_UPGRADE",
+      messageParameters = Array(format, config, option, config, option), null)
   }
 
   def sparkUpgradeInWritingDatesError(format: String, config: String): 
SparkUpgradeException = {
-    new SparkUpgradeException("3.0",
-      s"""
-         |writing dates before 1582-10-15 or timestamps before 
1900-01-01T00:00:00Z into $format
-         |files can be dangerous, as the files may be read by Spark 2.x or 
legacy versions of Hive
-         |later, which uses a legacy hybrid calendar that is different from 
Spark 3.0+'s Proleptic
-         |Gregorian calendar. See more details in SPARK-31404. You can set 
$config to 'LEGACY' to
-         |rebase the datetime values w.r.t. the calendar difference during 
writing, to get maximum
-         |interoperability. Or set $config to 'CORRECTED' to write the 
datetime values as it is,
-         |if you are 100% sure that the written files will only be read by 
Spark 3.0+ or other
-         |systems that use Proleptic Gregorian calendar.
-       """.stripMargin, null)
+    new SparkUpgradeException(
+      version = "3.0",
+      errorClass = "WRITING_AMBIGUOUS_DATES_AFTER_UPGRADE",
+      messageParameters = Array(format, config, config), null)
   }
 
   def buildReaderUnsupportedForFileFormatError(format: String): Throwable = {
-    new UnsupportedOperationException(s"buildReader is not supported for 
$format")
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_BUILD_READER_FOR_FILE_FORMAT",
+      messageParameters = Array(format))
   }
 
   def jobAbortedError(cause: Throwable): Throwable = {
-    new SparkException("Job aborted.", cause)
+    new SparkException(
+      errorClass = "JOB_ABORTED",
+      messageParameters = Array.empty, cause)
   }
 
   def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = {
-    new SparkException("Task failed while writing rows.", cause)
+    new SparkException(
+      errorClass = "TASK_FAILED_WRITING_ROWS",
+      messageParameters = Array.empty, cause)
   }
 
   def readCurrentFileNotFoundError(e: FileNotFoundException): Throwable = {
-    new FileNotFoundException(
-      s"""
-         |${e.getMessage}\n
-         |It is possible the underlying files have been updated. You can 
explicitly invalidate
-         |the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by
-         |recreating the Dataset/DataFrame involved.
-       """.stripMargin)
+    new SparkFileNotFoundException(
+      errorClass = "CANNOT_READ_CURRENT_FILE",
+      messageParameters = Array(e.getMessage))
   }
 
   def unsupportedSaveModeError(saveMode: String, pathExists: Boolean): 
Throwable = {
-    new IllegalStateException(s"unsupported save mode $saveMode ($pathExists)")
+    new SparkIllegalStateException(
+      errorClass = "UNSUPPORTED_SAVE_MODE",
+      messageParameters = Array(saveMode + " (" + pathExists.toString + ")"))
   }
 
   def cannotClearOutputDirectoryError(staticPrefixPath: Path): Throwable = {
-    new IOException(s"Unable to clear output directory $staticPrefixPath prior 
to writing to it")
+    new SparkIOException(
+      errorClass = "CANNOT_CLEAR_DIRECTORY",
+      messageParameters = Array("output", staticPrefixPath.toString))
   }
 
   def cannotClearPartitionDirectoryError(path: Path): Throwable = {
-    new IOException(s"Unable to clear partition directory $path prior to 
writing to it")
+    new SparkIOException(
+      errorClass = "CANNOT_CLEAR_DIRECTORY",
+      messageParameters = Array("partition", path.toString))
   }
 
   def failedToCastValueToDataTypeForPartitionColumnError(
       value: String, dataType: DataType, columnName: String): Throwable = {
-    new RuntimeException(s"Failed to cast value `$value` to " +
-      s"`$dataType` for partition column `$columnName`")
+    new SparkRuntimeException(
+      errorClass = "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN",
+      messageParameters = Array(value, dataType.toString, columnName))
   }
 
   def endOfStreamError(): Throwable = {
-    new NoSuchElementException("End of stream")
+    new SparkNoSuchElementException(
+      errorClass = "END_OF_STREAM",
+      messageParameters = Array.empty)
   }
 
   def fallbackV1RelationReportsInconsistentSchemaError(
       v2Schema: StructType, v1Schema: StructType): Throwable = {
-    new IllegalArgumentException(
-      "The fallback v1 relation reports inconsistent schema:\n" +
-        "Schema of v2 scan:     " + v2Schema + "\n" +
-        "Schema of v1 relation: " + v1Schema)
+    new SparkIllegalArgumentException(
+      errorClass = "FAILED_FALLBACK_V1_BECAUSE_OF_INCONSISTENT_SCHEMA",

Review comment:
       This actually looks like it might be an internal error - @cloud-fan, can 
you confirm? If so, please use `INTERNAL_ERROR` instead.

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -39,9 +57,31 @@
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "END_OF_STREAM" : {
+    "message" : [ "End of stream" ]
+  },
+  "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN" : {
+    "message" : [ "Failed to cast value `%s` to `%s` for partition column 
`%s`" ],
+    "sqlState" : "22023"
+  },
   "FAILED_EXECUTE_UDF" : {
     "message" : [ "Failed to execute user defined function (%s: (%s) => %s)" ]
   },
+  "FAILED_FALLBACK_V1_BECAUSE_OF_INCONSISTENT_SCHEMA" : {
+    "message" : [ "The fallback v1 relation reports inconsistent schema:\n 
Schema of v2 scan:     %s\nSchema of v1 relation: %s" ]
+  },
+  "FAILED_FIND_DATASOURCE" : {
+    "message" : [ "Failed to find data source: %s. Please find packages at 
http://spark.apache.org/third-party-projects.html"; ]

Review comment:
       Bump again

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -11,19 +11,37 @@
     "message" : [ "%s cannot be represented as Decimal(%s, %s)." ],
     "sqlState" : "22005"
   },
+  "CANNOT_CLEAR_SOME_DIRECTORY" : {
+    "message" : [ "Unable to clear %s directory %s prior to writing to it" ]
+  },
+  "CANNOT_DROP_NONEMPTY_NAMESPACE" : {
+    "message" : [ "Cannot drop a non-empty namespace: %s. Use CASCADE option 
to drop a non-empty namespace." ]
+  },
   "CANNOT_EVALUATE_EXPRESSION" : {
     "message" : [ "Cannot evaluate expression: %s" ]
   },
+  "CANNOT_FIND_CLASS_IN_SPARK2" : {
+    "message" : [ "%s was removed in Spark 2.0. Please check if your library 
is compatible with Spark 2.0" ]
+  },
   "CANNOT_GENERATE_CODE_FOR_EXPRESSION" : {
     "message" : [ "Cannot generate code for expression: %s" ]
   },
   "CANNOT_PARSE_DECIMAL" : {
     "message" : [ "Cannot parse decimal" ],
     "sqlState" : "42000"
   },
+  "CANNOT_READ_CURRENT_FILE" : {
+    "message" : [ "%s \n It is possible the underlying files have been 
updated. You can explicitly invalidate the cache in Spark by running 'REFRESH 
TABLE tableName' command in SQL or by recreating the Dataset/DataFrame 
involved." ]
+  },
   "CANNOT_TERMINATE_GENERATOR" : {
     "message" : [ "Cannot terminate expression: %s" ]
   },
+  "CANNOT_UPGRADE_IN_READING_DATES" : {

Review comment:
       I believe this class is no longer used, can you remove it?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -11,19 +11,37 @@
     "message" : [ "%s cannot be represented as Decimal(%s, %s)." ],
     "sqlState" : "22005"
   },
+  "CANNOT_CLEAR_SOME_DIRECTORY" : {
+    "message" : [ "Unable to clear %s directory %s prior to writing to it" ]
+  },
+  "CANNOT_DROP_NONEMPTY_NAMESPACE" : {
+    "message" : [ "Cannot drop a non-empty namespace: %s. Use CASCADE option 
to drop a non-empty namespace." ]
+  },
   "CANNOT_EVALUATE_EXPRESSION" : {
     "message" : [ "Cannot evaluate expression: %s" ]
   },
+  "CANNOT_FIND_CLASS_IN_SPARK2" : {
+    "message" : [ "%s was removed in Spark 2.0. Please check if your library 
is compatible with Spark 2.0" ]
+  },
   "CANNOT_GENERATE_CODE_FOR_EXPRESSION" : {
     "message" : [ "Cannot generate code for expression: %s" ]
   },
   "CANNOT_PARSE_DECIMAL" : {
     "message" : [ "Cannot parse decimal" ],
     "sqlState" : "42000"
   },
+  "CANNOT_READ_CURRENT_FILE" : {
+    "message" : [ "%s \n It is possible the underlying files have been 
updated. You can explicitly invalidate the cache in Spark by running 'REFRESH 
TABLE tableName' command in SQL or by recreating the Dataset/DataFrame 
involved." ]
+  },
   "CANNOT_TERMINATE_GENERATOR" : {
     "message" : [ "Cannot terminate expression: %s" ]
   },
+  "CANNOT_UPGRADE_IN_READING_DATES" : {
+    "message" : [ "You may get a different result due to the upgrading of 
Spark %s reading dates before 1582-10-15 or timestamps before 
1900-01-01T00:00:00Z from %s files can be ambiguous, as the files may be 
written by Spark 2.x or legacy versions of Hive, which uses a legacy hybrid 
calendar that is different from Spark 3.0+'s Proleptic Gregorian calendar. See 
more details in SPARK-31404. You can set the SQL config '%s' or the datasource 
option '%s' to 'LEGACY' to rebase the datetime values w.r.t. the calendar 
difference during reading. To read the datetime values as it is, set the SQL 
config '%s' or the datasource option '%s' to 'CORRECTED'." ]
+  },
+  "CANNOT_UPGRADE_IN_WRITING_DATES" : {

Review comment:
       I believe this class is no longer used, can you remove it?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -121,13 +174,28 @@
     "message" : [ "The second argument of '%s' function needs to be an 
integer." ],
     "sqlState" : "22023"
   },
+  "SPECIFIED_MULTIPLE_PATHS" : {
+    "message" : [ "Expected exactly one path to be specified, but got: %s" ],
+    "sqlState" : "22023"
+  },
+  "TASK_FAILED_WRITING_ROWS" : {

Review comment:
       This grammar is a bit strange; maybe `FAILED_TASK_WHILE_WRITING_ROWS`?

##########
File path: core/src/main/resources/error/error-classes.json
##########
@@ -39,9 +53,32 @@
     "message" : [ "Found duplicate keys '%s'" ],
     "sqlState" : "23000"
   },
+  "END_OF_STREAM" : {
+    "message" : [ "End of stream" ]
+  },
+  "FAILED_CAST_VALUE_TO_DATATYPE_FOR_PARTITION_COLUMN" : {
+    "message" : [ "Failed to cast value `%s` to `%s` for partition column 
`%s`" ],
+    "sqlState" : "22023"
+  },
   "FAILED_EXECUTE_UDF" : {
     "message" : [ "Failed to execute user defined function (%s: (%s) => %s)" ]
   },
+  "FAILED_FALLBACK_V1_BECAUSE_OF_INCONSISTENT_SCHEMA" : {
+    "message" : [ "The fallback v1 relation reports inconsistent schema:", 
"Schema of v2 scan:     %s", "Schema of v1 relation: %s" ],
+    "sqlState" : "22023"
+  },
+  "FAILED_FIND_DATA_SOURCE" : {
+    "message" : [ "Failed to find data source: %s. Please find packages at 
http://spark.apache.org/third-party-projects.html"; ]
+  },
+  "FAILED_FORMAT_DATETIME_IN_NEW_FORMATTER" : {
+    "message" : [ "Fail to format it to '%s' in the new formatter. You can set 
%s to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and 
treat it as an invalid datetime string." ]
+  },
+  "FAILED_PARSE_DATETIME_IN_NEW_PARSER" : {
+    "message" : [ "Fail to parse '%s' in the new parser. You can set %s to 
LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat 
it as an invalid datetime string." ]
+  },
+  "FAILED_RECOGNIZE_PATTERN_AFTER_UPGRADE" : {

Review comment:
       Bump!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] karenfeng commented on a change in pull request #34093: [SPARK-36294][SQL] Refactor fifth set of 20 query execution errors to use error classes

Reply via email to