This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 66c6e19aad1 [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer 66c6e19aad1 is described below commit 66c6e19aad1e42d404b70b7dcddf871f28c3774f Author: panbingkun <pbk1...@gmail.com> AuthorDate: Mon May 16 08:31:16 2022 +0300 [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer ### What changes were proposed in this pull request? Migrate the following errors in QueryCompilationErrors: * dataTypeMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH * fieldNumberMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH ### Why are the changes needed? Porting compilation errors of unsupported deserializer to new error framework. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new UT. Closes #36479 from panbingkun/SPARK-38688. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 11 ++++++ .../spark/sql/errors/QueryCompilationErrors.scala | 9 +++-- .../apache/spark/sql/errors/QueryErrorsBase.scala | 4 +++ .../catalyst/encoders/EncoderResolutionSuite.scala | 26 ++++++++------ .../scala/org/apache/spark/sql/DatasetSuite.scala | 18 ---------- .../sql/errors/QueryCompilationErrorsSuite.scala | 40 +++++++++++++++++++++- 6 files changed, 76 insertions(+), 32 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 3a7bc757f73..f401ea8d29a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -200,6 +200,17 @@ "message" : [ "Unsupported data type <typeName>" ], "sqlState" : "0A000" }, + "UNSUPPORTED_DESERIALIZER" : { + "message" : [ "The deserializer is not supported: " ], + "subClass" : { + "DATA_TYPE_MISMATCH" : { + "message" : [ "need <quantifier> <desiredType> field but got <dataType>." ] + }, + "FIELD_NUMBER_MISMATCH" : { + "message" : [ "try to map <schema> to Tuple<ordinal>, but failed as the number of fields does not line up." ] + } + } + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported: " ], "subClass" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index efb4389ec50..d803cd23df6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -147,14 +147,17 @@ object QueryCompilationErrors extends QueryErrorsBase { dataType: DataType, desiredType: String): Throwable = { val quantifier = if (desiredType.equals("array")) "an" else "a" new AnalysisException( - s"need $quantifier $desiredType field but got " + dataType.catalogString) + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = + Array("DATA_TYPE_MISMATCH", quantifier, toSQLType(desiredType), toSQLType(dataType))) } def fieldNumberMismatchForDeserializerError( schema: StructType, maxOrdinal: Int): Throwable = { new AnalysisException( - s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " + - "but failed as the number of fields does not line up.") + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = + Array("FIELD_NUMBER_MISMATCH", toSQLType(schema), (maxOrdinal + 1).toString)) } def upCastFailureError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index d51ee13acef..b47b9f12fb1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -60,6 +60,10 @@ trait QueryErrorsBase { quoteByDefault(t.sql) } + def toSQLType(text: String): String = { + quoteByDefault(text.toUpperCase(Locale.ROOT)) + } + def toSQLConf(conf: String): String = { quoteByDefault(conf) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index dae7340ac08..e5a3a531059 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -118,7 +118,8 @@ class EncoderResolutionSuite extends PlanTest { val encoder = ExpressionEncoder[ArrayClass] val attrs = Seq($"arr".int) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "need an array field but got int") + """[UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH] """ + + """The deserializer is not supported: need an "ARRAY" field but got "INT".""") } test("the real type is not compatible with encoder schema: array element type") { @@ -134,7 +135,8 @@ class EncoderResolutionSuite extends PlanTest { withClue("inner element is not array") { val attrs = Seq($"nestedArr".array(new StructType().add("arr", "int"))) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "need an array field but got int") + """[UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH] """ + + """The deserializer is not supported: need an "ARRAY" field but got "INT".""") } withClue("nested array element type is not compatible") { @@ -168,15 +170,17 @@ class EncoderResolutionSuite extends PlanTest { { val attrs = Seq($"a".string, $"b".long, $"c".int) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct<a:string,b:bigint,c:int> to Tuple2, " + - "but failed as the number of fields does not line up.") + """[UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH] The deserializer is not supported: """ + + """try to map "STRUCT<a: STRING, b: BIGINT, c: INT>" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } { val attrs = Seq($"a".string) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct<a:string> to Tuple2, " + - "but failed as the number of fields does not line up.") + """[UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH] """ + + """The deserializer is not supported: try to map "STRUCT<a: STRING>" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } } @@ -186,15 +190,17 @@ class EncoderResolutionSuite extends PlanTest { { val attrs = Seq($"a".string, $"b".struct($"x".long, $"y".string, $"z".int)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct<x:bigint,y:string,z:int> to Tuple2, " + - "but failed as the number of fields does not line up.") + """[UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH] The deserializer is not supported: """ + + """try to map "STRUCT<x: BIGINT, y: STRING, z: INT>" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } { val attrs = Seq($"a".string, $"b".struct($"x".long)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct<x:bigint> to Tuple2, " + - "but failed as the number of fields does not line up.") + """[UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH] The deserializer is not supported: """ + + """try to map "STRUCT<x: BIGINT>" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 0dc97de50ff..51c8d2c351e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1002,24 +1002,6 @@ class DatasetSuite extends QueryTest checkDataset(cogrouped, "a13", "b24") } - test("give nice error message when the real number of fields doesn't match encoder schema") { - val ds = Seq(ClassData("a", 1), ClassData("b", 2)).toDS() - - val message = intercept[AnalysisException] { - ds.as[(String, Int, Long)] - }.message - assert(message == - "Try to map struct<a:string,b:int> to Tuple3, " + - "but failed as the number of fields does not line up.") - - val message2 = intercept[AnalysisException] { - ds.as[Tuple1[String]] - }.message - assert(message2 == - "Try to map struct<a:string,b:int> to Tuple1, " + - "but failed as the number of fields does not line up.") - } - test("SPARK-13440: Resolving option fields") { val df = Seq(1, 2, 3).toDS() val ds = df.as[Option[Int]] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 40b18ad3cc7..1169f55f888 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.errors -import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtils, QueryTest, Row} import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test} import org.apache.spark.sql.expressions.SparkUserDefinedFunction import org.apache.spark.sql.functions.{grouping, grouping_id, lit, struct, sum, udf} @@ -30,6 +30,8 @@ case class StringIntClass(a: String, b: Int) case class ComplexClass(a: Long, b: StringLongClass) +case class ArrayClass(arr: Seq[StringIntClass]) + class QueryCompilationErrorsSuite extends QueryTest with QueryErrorsSuiteBase { @@ -545,6 +547,42 @@ class QueryCompilationErrorsSuite msg = "Literal expressions required for pivot values, found 'earnings#\\w+'", matchMsg = true) } + + test("UNSUPPORTED_DESERIALIZER: data type mismatch") { + val e = intercept[AnalysisException] { + sql("select 1 as arr").as[ArrayClass] + } + checkErrorClass( + exception = e, + errorClass = "UNSUPPORTED_DESERIALIZER", + errorSubClass = Some("DATA_TYPE_MISMATCH"), + msg = """The deserializer is not supported: need an "ARRAY" field but got "INT".""") + } + + test("UNSUPPORTED_DESERIALIZER: " + + "the real number of fields doesn't match encoder schema") { + val ds = Seq(ClassData("a", 1), ClassData("b", 2)).toDS() + + val e1 = intercept[AnalysisException] { + ds.as[(String, Int, Long)] + } + checkErrorClass( + exception = e1, + errorClass = "UNSUPPORTED_DESERIALIZER", + errorSubClass = Some("FIELD_NUMBER_MISMATCH"), + msg = "The deserializer is not supported: try to map \"STRUCT<a: STRING, b: INT>\" " + + "to Tuple3, but failed as the number of fields does not line up.") + + val e2 = intercept[AnalysisException] { + ds.as[Tuple1[String]] + } + checkErrorClass( + exception = e2, + errorClass = "UNSUPPORTED_DESERIALIZER", + errorSubClass = Some("FIELD_NUMBER_MISMATCH"), + msg = "The deserializer is not supported: try to map \"STRUCT<a: STRING, b: INT>\" " + + "to Tuple1, but failed as the number of fields does not line up.") + } } class MyCastToString extends SparkUserDefinedFunction( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org