This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0cd5ca5a7b3 [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432] 0cd5ca5a7b3 is described below commit 0cd5ca5a7b31f65a005c8ee2e90a6b4a29623ba7 Author: Jiaan Geng <belie...@163.com> AuthorDate: Tue Jun 6 10:28:48 2023 +0300 [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432] ### What changes were proposed in this pull request? The pr aims to assign names to the error class `_LEGACY_ERROR_TEMP_[2426-2432]`. ### Why are the changes needed? Improve the error framework. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? Exists test cases. Closes #41424 from beliefer/SPARK-43913. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 58 ++++++++++++---------- .../sql/catalyst/analysis/CheckAnalysis.scala | 51 +++++++++++-------- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++---- .../CreateTablePartitioningValidationSuite.scala | 22 ++++---- .../negative-cases/invalid-correlation.sql.out | 6 ++- .../negative-cases/invalid-correlation.sql.out | 6 ++- 6 files changed, 93 insertions(+), 70 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index de80415d85b..8c3c076ce74 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -660,6 +660,11 @@ "The event time <eventName> has the invalid type <eventType>, but expected \"TIMESTAMP\"." ] }, + "EXPRESSION_TYPE_IS_NOT_ORDERABLE" : { + "message" : [ + "Column expression <expr> cannot be sorted because its type <exprType> is not orderable." + ] + }, "FAILED_EXECUTE_UDF" : { "message" : [ "Failed to execute user defined function (<functionName>: (<signature>) => <result>)." @@ -1541,6 +1546,24 @@ ], "sqlState" : "42803" }, + "MISSING_ATTRIBUTES" : { + "message" : [ + "Resolved attribute(s) <missingAttributes> missing from <input> in operator <operator>." + ], + "subClass" : { + "RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION" : { + "message" : [ + "Attribute(s) with the same name appear in the operation: <operation>.", + "Please check if the right attribute(s) are used." + ] + }, + "RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT" : { + "message" : [ + "" + ] + } + } + }, "MISSING_GROUP_BY" : { "message" : [ "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses." @@ -1945,6 +1968,11 @@ "Query [id = <id>, runId = <runId>] terminated with exception: <message>" ] }, + "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : { + "message" : [ + "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>." + ] + }, "TABLE_OR_VIEW_ALREADY_EXISTS" : { "message" : [ "Cannot create table or view <relationName> because it already exists.", @@ -2310,6 +2338,11 @@ "Parameter markers in unexpected statement: <statement>. Parameter markers must only be used in a query, or DML statement." ] }, + "PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED" : { + "message" : [ + "Invalid partitioning: <cols> is missing or is in a map or array." + ] + }, "PIVOT_AFTER_GROUP_BY" : { "message" : [ "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery." @@ -5525,31 +5558,6 @@ "failed to evaluate expression <sqlExpr>: <msg>" ] }, - "_LEGACY_ERROR_TEMP_2426" : { - "message" : [ - "nondeterministic expression <sqlExpr> should not appear in grouping expression." - ] - }, - "_LEGACY_ERROR_TEMP_2427" : { - "message" : [ - "sorting is not supported for columns of type <type>." - ] - }, - "_LEGACY_ERROR_TEMP_2428" : { - "message" : [ - "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>." - ] - }, - "_LEGACY_ERROR_TEMP_2431" : { - "message" : [ - "Invalid partitioning: <cols> is missing or is in a map or array." - ] - }, - "_LEGACY_ERROR_TEMP_2432" : { - "message" : [ - "<msg>" - ] - }, "_LEGACY_ERROR_TEMP_2433" : { "message" : [ "Only a single table generating function is allowed in a SELECT clause, found:", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 594c0b666e8..9124890d4af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -474,9 +474,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB // This is just a sanity check, our analysis rule PullOutNondeterministic should // already pull out those nondeterministic expressions and evaluate them in // a Project node. - expr.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2426", - messageParameters = Map("sqlExpr" -> expr.sql)) + throw SparkException.internalError( + msg = s"Non-deterministic expression '${toSQLExpr(expr)}' should not appear in " + + "grouping expression.", + context = expr.origin.getQueryContext, + summary = expr.origin.context.summary) } } @@ -545,8 +547,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB orders.foreach { order => if (!RowOrdering.isOrderable(order.dataType)) { order.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2427", - messageParameters = Map("type" -> order.dataType.catalogString)) + errorClass = "EXPRESSION_TYPE_IS_NOT_ORDERABLE", + messageParameters = Map("exprType" -> toSQLType(order.dataType))) } } @@ -560,7 +562,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB val offset = offsetExpr.eval().asInstanceOf[Int] if (Int.MaxValue - limit < offset) { child.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2428", + errorClass = "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT", messageParameters = Map( "limit" -> limit.toString, "offset" -> offset.toString)) @@ -624,8 +626,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB if (badReferences.nonEmpty) { create.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2431", - messageParameters = Map("cols" -> badReferences.mkString(", "))) + errorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED", + messageParameters = Map( + "cols" -> badReferences.map(r => toSQLId(r)).mkString(", "))) } create.tableSchema.foreach(f => TypeUtils.failWithIntervalType(f.dataType)) @@ -641,28 +644,34 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB operator match { case o if o.children.nonEmpty && o.missingInput.nonEmpty => - val missingAttributes = o.missingInput.mkString(",") - val input = o.inputSet.mkString(",") - val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " + - s"from $input in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}." + val missingAttributes = o.missingInput.map(attr => toSQLExpr(attr)).mkString(", ") + val input = o.inputSet.map(attr => toSQLExpr(attr)).mkString(", ") val resolver = plan.conf.resolver val attrsWithSameName = o.missingInput.filter { missing => o.inputSet.exists(input => resolver(missing.name, input.name)) } - val msg = if (attrsWithSameName.nonEmpty) { - val sameNames = attrsWithSameName.map(_.name).mkString(",") - s"$msgForMissingAttributes Attribute(s) with the same name appear in the " + - s"operation: $sameNames. Please check if the right attribute(s) are used." + if (attrsWithSameName.nonEmpty) { + val sameNames = attrsWithSameName.map(attr => toSQLExpr(attr)).mkString(", ") + o.failAnalysis( + errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION", + messageParameters = Map( + "missingAttributes" -> missingAttributes, + "input" -> input, + "operator" -> operator.simpleString(SQLConf.get.maxToStringFields), + "operation" -> sameNames + )) } else { - msgForMissingAttributes + o.failAnalysis( + errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT", + messageParameters = Map( + "missingAttributes" -> missingAttributes, + "input" -> input, + "operator" -> operator.simpleString(SQLConf.get.maxToStringFields) + )) } - o.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2432", - messageParameters = Map("msg" -> msg)) - case p @ Project(exprs, _) if containsMultipleGenerators(exprs) => p.failAnalysis( errorClass = "_LEGACY_ERROR_TEMP_2433", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 5f6256881a4..b657dd55eb7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -721,7 +721,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorClassTest( "the sum of num_rows in limit clause and num_rows in offset clause less than Int.MaxValue", testRelation.offset(Literal(2000000000, IntegerType)).limit(Literal(1000000000, IntegerType)), - "_LEGACY_ERROR_TEMP_2428", + "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT", Map("limit" -> "1000000000", "offset" -> "2000000000")) errorTest( @@ -770,14 +770,16 @@ class AnalysisErrorSuite extends AnalysisTest { assert(plan.resolved) - val resolved = s"${attrA.toString},${attrC.toString}" - - val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " + - s"in operator !Aggregate [${aliases.mkString(", ")}]. " + - s"Attribute(s) with the same name appear in the operation: a. " + - "Please check if the right attribute(s) are used." - - assertAnalysisError(plan, errorMsg :: Nil) + assertAnalysisErrorClass( + inputPlan = plan, + expectedErrorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION", + expectedMessageParameters = Map( + "missingAttributes" -> "\"a\", \"c\"", + "input" -> "\"a\"", + "operator" -> s"!Aggregate [${aliases.mkString(", ")}]", + "operation" -> "\"a\"" + ) + ) } test("error test for self-join") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala index 67441e18b0f..ba312ddbc49 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala @@ -40,9 +40,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest { ignoreIfExists = false) assert(!plan.resolved) - assertAnalysisError(plan, Seq( - "Invalid partitioning", - "does_not_exist is missing or is in a map or array")) + assertAnalysisErrorClass(plan, + expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED", + expectedMessageParameters = Map("cols" -> "`does_not_exist`")) } test("CreateTableAsSelect: fail missing top-level column nested reference") { @@ -57,9 +57,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest { ignoreIfExists = false) assert(!plan.resolved) - assertAnalysisError(plan, Seq( - "Invalid partitioning", - "does_not_exist.z is missing or is in a map or array")) + assertAnalysisErrorClass(plan, + expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED", + expectedMessageParameters = Map("cols" -> "`does_not_exist`.`z`")) } test("CreateTableAsSelect: fail missing nested column") { @@ -74,9 +74,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest { ignoreIfExists = false) assert(!plan.resolved) - assertAnalysisError(plan, Seq( - "Invalid partitioning", - "point.z is missing or is in a map or array")) + assertAnalysisErrorClass(plan, + expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED", + expectedMessageParameters = Map("cols" -> "`point`.`z`")) } test("CreateTableAsSelect: fail with multiple errors") { @@ -92,8 +92,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest { assert(!plan.resolved) assertAnalysisErrorClass(plan, - expectedErrorClass = "_LEGACY_ERROR_TEMP_2431", - expectedMessageParameters = Map("cols" -> "does_not_exist, point.z")) + expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED", + expectedMessageParameters = Map("cols" -> "`does_not_exist`, `point`.`z`")) } test("CreateTableAsSelect: success with top-level column") { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out index 08ddc2cfcd2..2992bc6c9a1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out @@ -76,9 +76,11 @@ WHERE t1a IN (SELECT min(t2a) -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2432", + "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT", "messageParameters" : { - "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])." + "input" : "\"min(t2a)\", \"t2c\"", + "missingAttributes" : "\"t2b\"", + "operator" : "!Filter t2c#x IN (list#x [t2b#x])" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 39b4f87bb1b..d1eb86a2d33 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -71,9 +71,11 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2432", + "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT", "messageParameters" : { - "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])." + "input" : "\"min(t2a)\", \"t2c\"", + "missingAttributes" : "\"t2b\"", + "operator" : "!Filter t2c#x IN (list#x [t2b#x])" }, "queryContext" : [ { "objectType" : "", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org