This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5d840eb4553 [SPARK-44299][SQL] Assign names to the error class
_LEGACY_ERROR_TEMP_227[4-6,8]
5d840eb4553 is described below
commit 5d840eb455350ef3f6235a031a1689bf4a51007d
Author: panbingkun <[email protected]>
AuthorDate: Thu Jul 6 10:08:45 2023 +0300
[SPARK-44299][SQL] Assign names to the error class
_LEGACY_ERROR_TEMP_227[4-6,8]
### What changes were proposed in this pull request?
The pr aims to assign names to the error class, include:
- _LEGACY_ERROR_TEMP_2274 => UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN
- _LEGACY_ERROR_TEMP_2275 => CANNOT_INVOKE_IN_TRANSFORMATIONS
- _LEGACY_ERROR_TEMP_2276 => UNSUPPORTED_FEATURE .HIVE_WITH_ANSI_INTERVALS
- _LEGACY_ERROR_TEMP_2278 => INVALID_FORMAT.MISMATCH_INPUT
### Why are the changes needed?
The changes improve the error framework.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Update & Add new UT.
- Manually test.
- Pass GA.
Closes #41858 from panbingkun/SPARK-44299.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/resources/error/error-classes.json | 40 +++++++++++-----------
...-error-conditions-invalid-format-error-class.md | 4 +++
...r-conditions-unsupported-feature-error-class.md | 8 +++++
docs/sql-error-conditions.md | 6 ++++
.../spark/sql/catalyst/util/ToNumberParser.scala | 4 +--
.../spark/sql/errors/QueryExecutionErrors.scala | 20 +++++------
.../expressions/StringExpressionsSuite.scala | 9 +++--
.../apache/spark/sql/execution/command/ddl.scala | 2 +-
.../sql-tests/results/postgreSQL/numeric.sql.out | 10 +++---
.../results/postgreSQL/numeric.sql.out.java21 | 10 +++---
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 13 +++++++
.../spark/sql/DataFrameNaFunctionsSuite.scala | 12 ++++---
.../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +-
.../command/AlterTableAddColumnsSuite.scala | 13 ++++---
14 files changed, 101 insertions(+), 52 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 8bdb02470ef..44bec5e8ced 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -128,6 +128,11 @@
],
"sqlState" : "22546"
},
+ "CANNOT_INVOKE_IN_TRANSFORMATIONS" : {
+ "message" : [
+ "Dataset transformations and actions can only be invoked by the driver,
not inside of other Dataset transformations; for example, dataset1.map(x =>
dataset2.values.count() * x) is invalid because the values transformation and
count action cannot be performed inside of the dataset1.map transformation. For
more information, see SPARK-28702."
+ ]
+ },
"CANNOT_LOAD_FUNCTION_CLASS" : {
"message" : [
"Cannot load class <className> when registering the function
<functionName>, please make sure it is on the classpath."
@@ -1192,6 +1197,11 @@
"The escape character is not allowed to precede <char>."
]
},
+ "MISMATCH_INPUT" : {
+ "message" : [
+ "The input <inputType> '<input>' does not match the format."
+ ]
+ },
"THOUSANDS_SEPS_MUST_BEFORE_DEC" : {
"message" : [
"Thousands separators (, or G) may not appear after the decimal
point in the number format."
@@ -2583,6 +2593,11 @@
"Drop the namespace <namespace>."
]
},
+ "HIVE_WITH_ANSI_INTERVALS" : {
+ "message" : [
+ "Hive table <tableName> with ANSI intervals."
+ ]
+ },
"INSERT_PARTITION_SPEC_IF_NOT_EXISTS" : {
"message" : [
"INSERT INTO <tableName> with IF NOT EXISTS in the PARTITION spec."
@@ -2663,6 +2678,11 @@
"Remove a comment from the namespace <namespace>."
]
},
+ "REPLACE_NESTED_COLUMN" : {
+ "message" : [
+ "The replace function does not support nested column <colName>."
+ ]
+ },
"SET_NAMESPACE_PROPERTY" : {
"message" : [
"<property> is a reserved namespace property, <msg>."
@@ -5627,31 +5647,11 @@
"<message>"
]
},
- "_LEGACY_ERROR_TEMP_2274" : {
- "message" : [
- "Nested field <colName> is not supported."
- ]
- },
- "_LEGACY_ERROR_TEMP_2275" : {
- "message" : [
- "Dataset transformations and actions can only be invoked by the driver,
not inside of other Dataset transformations; for example, dataset1.map(x =>
dataset2.values.count() * x) is invalid because the values transformation and
count action cannot be performed inside of the dataset1.map transformation. For
more information, see SPARK-28702."
- ]
- },
- "_LEGACY_ERROR_TEMP_2276" : {
- "message" : [
- "Hive table <tableName> with ANSI intervals is not supported."
- ]
- },
"_LEGACY_ERROR_TEMP_2277" : {
"message" : [
"Number of dynamic partitions created is <numWrittenParts>, which is
more than <maxDynamicPartitions>. To solve this try to set
<maxDynamicPartitionsKey> to at least <numWrittenParts>."
]
},
- "_LEGACY_ERROR_TEMP_2278" : {
- "message" : [
- "The input <valueType> '<input>' does not match the given number format:
'<format>'."
- ]
- },
"_LEGACY_ERROR_TEMP_2320" : {
"message" : [
"distinct aggregates are not allowed in observed metrics, but found:
<sqlExpr>."
diff --git a/docs/sql-error-conditions-invalid-format-error-class.md
b/docs/sql-error-conditions-invalid-format-error-class.md
index 365296ba29f..6470883435c 100644
--- a/docs/sql-error-conditions-invalid-format-error-class.md
+++ b/docs/sql-error-conditions-invalid-format-error-class.md
@@ -49,6 +49,10 @@ The escape character is not allowed to end with.
The escape character is not allowed to precede `<char>`.
+## MISMATCH_INPUT
+
+The input <inputType> '<input>' does not match the format.
+
## THOUSANDS_SEPS_MUST_BEFORE_DEC
Thousands separators (, or G) may not appear after the decimal point in the
number format.
diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md
b/docs/sql-error-conditions-unsupported-feature-error-class.md
index 232338bb12b..25f09118f74 100644
--- a/docs/sql-error-conditions-unsupported-feature-error-class.md
+++ b/docs/sql-error-conditions-unsupported-feature-error-class.md
@@ -69,6 +69,10 @@ DESC TABLE COLUMN for a specific partition.
Drop the namespace `<namespace>`.
+## HIVE_WITH_ANSI_INTERVALS
+
+Hive table <tableName> with ANSI intervals.
+
## INSERT_PARTITION_SPEC_IF_NOT_EXISTS
INSERT INTO `<tableName>` with IF NOT EXISTS in the PARTITION spec.
@@ -117,6 +121,10 @@ Parameter markers are not allowed in `<statement>`.
Invalid partitioning: `<cols>` is missing or is in a map or array.
+## REPLACE_NESTED_COLUMN
+
+The replace function does not support nested column <colName>.
+
## PIVOT_AFTER_GROUP_BY
PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a
subquery.
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index b069f694ff3..6396f7a08d3 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -153,6 +153,12 @@ Cannot convert SQL `<sqlColumn>` to Protobuf
`<protobufColumn>` because `<data>`
Cannot decode url : `<url>`.
+### CANNOT_INVOKE_IN_TRANSFORMATIONS
+
+SQLSTATE: none assigned
+
+Dataset transformations and actions can only be invoked by the driver, not
inside of other Dataset transformations; for example, dataset1.map(x =>
dataset2.values.count() * x) is invalid because the values transformation and
count action cannot be performed inside of the dataset1.map transformation. For
more information, see SPARK-28702.
+
### CANNOT_LOAD_FUNCTION_CLASS
SQLSTATE: none assigned
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
index 5099b3fdb4b..d56bca30a05 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
@@ -594,14 +594,14 @@ class ToNumberParser(numberFormat: String, errorOnFail:
Boolean) extends Seriali
private def formatMatchFailure(input: UTF8String, originNumberFormat:
String): Decimal = {
if (errorOnFail) {
throw QueryExecutionErrors.invalidNumberFormatError(
- "string", input.toString, originNumberFormat)
+ StringType, input.toString, originNumberFormat)
}
null
}
private def formatMatchFailure(input: Decimal, originNumberFormat: String):
UTF8String = {
if (errorOnFail) {
throw QueryExecutionErrors.invalidNumberFormatError(
- "Decimal value", input.toString, originNumberFormat)
+ DecimalType.fromDecimal(input), input.toString, originNumberFormat)
}
null
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index eded5e6534f..157bb1b6192 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2604,14 +2604,13 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
def nestedFieldUnsupportedError(colName: String):
SparkUnsupportedOperationException = {
new SparkUnsupportedOperationException(
- errorClass = "_LEGACY_ERROR_TEMP_2274",
- messageParameters = Map(
- "colName" -> colName))
+ errorClass = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN",
+ messageParameters = Map("colName" -> toSQLId(colName)))
}
def transformationsAndActionsNotInvokedByDriverError(): Throwable = {
new SparkException(
- errorClass = "_LEGACY_ERROR_TEMP_2275",
+ errorClass = "CANNOT_INVOKE_IN_TRANSFORMATIONS",
messageParameters = Map.empty,
cause = null)
}
@@ -2683,10 +2682,11 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
"functionName" -> toSQLId("aes_encrypt")))
}
- def hiveTableWithAnsiIntervalsError(tableName: String):
SparkUnsupportedOperationException = {
+ def hiveTableWithAnsiIntervalsError(
+ table: TableIdentifier): SparkUnsupportedOperationException = {
new SparkUnsupportedOperationException(
- errorClass = "_LEGACY_ERROR_TEMP_2276",
- messageParameters = Map("tableName" -> tableName))
+ errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
+ messageParameters = Map("tableName" -> toSQLId(table.nameParts)))
}
def cannotConvertOrcTimestampToTimestampNTZError(): Throwable = {
@@ -2720,11 +2720,11 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
}
def invalidNumberFormatError(
- valueType: String, input: String, format: String):
SparkIllegalArgumentException = {
+ dataType: DataType, input: String, format: String):
SparkIllegalArgumentException = {
new SparkIllegalArgumentException(
- errorClass = "_LEGACY_ERROR_TEMP_2278",
+ errorClass = "INVALID_FORMAT.MISMATCH_INPUT",
messageParameters = Map(
- "valueType" -> valueType,
+ "inputType" -> toSQLType(dataType),
"input" -> input,
"format" -> format))
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index f320012d131..006c4a78056 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -1474,8 +1474,13 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
val toNumberExpr = ToNumber(Literal(str), Literal(format))
assert(toNumberExpr.checkInputDataTypes() ==
TypeCheckResult.TypeCheckSuccess)
- checkExceptionInExpression[SparkIllegalArgumentException](
- toNumberExpr, "does not match the given number format")
+ checkErrorInExpression[SparkIllegalArgumentException](
+ toNumberExpr,
+ errorClass = "INVALID_FORMAT.MISMATCH_INPUT",
+ parameters = Map(
+ "inputType" -> "\"STRING\"",
+ "input" -> str,
+ "format" -> format))
val tryToNumberExpr = TryToNumber(Literal(str), Literal(format))
assert(tryToNumberExpr.checkInputDataTypes() ==
TypeCheckResult.TypeCheckSuccess)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 8acf52b1250..bbe0d3c0c83 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -994,7 +994,7 @@ object DDLUtils extends Logging {
case HIVE_PROVIDER =>
val serde = table.storage.serde
if (schema.exists(_.dataType.isInstanceOf[AnsiIntervalType])) {
- throw hiveTableWithAnsiIntervalsError(table.identifier.toString)
+ throw hiveTableWithAnsiIntervalsError(table.identifier)
} else if (serde == HiveSerDe.sourceToSerDe("orc").get.serde) {
checkDataColNames("orc", schema)
} else if (serde == HiveSerDe.sourceToSerDe("parquet").get.serde ||
diff --git
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index 61b7a07631c..98159c947a2 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -4697,11 +4697,12 @@ struct<>
-- !query output
org.apache.spark.SparkIllegalArgumentException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+ "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT",
+ "sqlState" : "42601",
"messageParameters" : {
"format" : "99G999G999",
"input" : "-34,338,492",
- "valueType" : "string"
+ "inputType" : "\"STRING\""
}
}
@@ -4797,11 +4798,12 @@ struct<>
-- !query output
org.apache.spark.SparkIllegalArgumentException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+ "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT",
+ "sqlState" : "42601",
"messageParameters" : {
"format" : "999G999",
"input" : "123456",
- "valueType" : "string"
+ "inputType" : "\"STRING\""
}
}
diff --git
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21
index a1d9dbbc830..ad27a63837e 100644
---
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21
+++
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21
@@ -4697,11 +4697,12 @@ struct<>
-- !query output
org.apache.spark.SparkIllegalArgumentException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+ "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT",
+ "sqlState" : "42601",
"messageParameters" : {
"format" : "99G999G999",
"input" : "-34,338,492",
- "valueType" : "string"
+ "inputType" : "\"STRING\""
}
}
@@ -4797,11 +4798,12 @@ struct<>
-- !query output
org.apache.spark.SparkIllegalArgumentException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+ "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT",
+ "sqlState" : "42601",
"messageParameters" : {
"format" : "999G999",
"input" : "123456",
- "valueType" : "string"
+ "inputType" : "\"STRING\""
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index e7277451e04..c28ee3d8483 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -5901,6 +5901,19 @@ class DataFrameFunctionsSuite extends QueryTest with
SharedSparkSession {
checkAnswer(df2, Seq(Row(Row(1, 2))))
assert(df2.schema === expectedSchema)
}
+
+ test("CANNOT_INVOKE_IN_TRANSFORMATIONS - Dataset transformations and actions
" +
+ "can only be invoked by the driver, not inside of other Dataset
transformations") {
+ val df1 = Seq((1)).toDF("a")
+ val df2 = Seq((4, 5)).toDF("e", "f")
+ checkError(
+ exception = intercept[SparkException] {
+ df1.map(r => df2.count() * r.getInt(0)).collect()
+ }.getCause.asInstanceOf[SparkException],
+ errorClass = "CANNOT_INVOKE_IN_TRANSFORMATIONS",
+ parameters = Map.empty
+ )
+ }
}
object DataFrameFunctionsSuite {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 14baa75d81b..fb4bad5b9fa 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
import scala.collection.JavaConverters._
+import org.apache.spark.SparkUnsupportedOperationException
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{StringType, StructType}
@@ -546,9 +547,12 @@ class DataFrameNaFunctionsSuite extends QueryTest with
SharedSparkSession {
test("SPARK-34649: replace value of a nested column") {
val df = createDFWithNestedColumns
- val exception = intercept[UnsupportedOperationException] {
- df.na.replace("c1.c1-1", Map("b1" ->"a1"))
- }
- assert(exception.getMessage.equals("Nested field c1.c1-1 is not
supported."))
+ checkError(
+ exception = intercept[SparkUnsupportedOperationException] {
+ df.na.replace("c1.c1-1", Map("b1" ->"a1"))
+ },
+ errorClass = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN",
+ parameters = Map("colName" -> "`c1`.`c1-1`")
+ )
}
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 8e5f0b8b507..692c2215fde 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -3311,7 +3311,7 @@ class HiveDDLSuite
exception = intercept[SparkUnsupportedOperationException] {
sql(sqlCmd)
},
- errorClass = "_LEGACY_ERROR_TEMP_2276",
+ errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
parameters = Map("tableName" ->
s"`$SESSION_CATALOG_NAME`.`default`.`$tbl`")
)
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
index 2b28890eb45..3ae2ff562d1 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.hive.execution.command
+import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
import org.apache.spark.sql.execution.command.v1
/**
@@ -29,10 +31,13 @@ class AlterTableAddColumnsSuite
test("SPARK-36949: Disallow tables with ANSI intervals when the provider is
Hive") {
def check(tbl: String): Unit = {
- val errMsg = intercept[UnsupportedOperationException] {
- sql(s"ALTER TABLE $tbl ADD COLUMNS (ym INTERVAL YEAR)")
- }.getMessage
- assert(errMsg.contains("ANSI intervals is not supported"))
+ checkError(
+ exception = intercept[SparkUnsupportedOperationException] {
+ sql(s"ALTER TABLE $tbl ADD COLUMNS (ym INTERVAL YEAR)")
+ },
+ errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
+ parameters = Map("tableName" -> toSQLId(tbl))
+ )
}
withNamespaceAndTable("ns", "tbl") { tbl =>
sql(s"CREATE TABLE $tbl (id INT) $defaultUsing")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]