This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 066870d938c [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422` 066870d938c is described below commit 066870d938cf7fb2f088c2a7f6a036de6fb5b7d2 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Fri Dec 16 10:20:38 2022 +0300 [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422` ### What changes were proposed in this pull request? In the PR, I propose to assign new name `MISSING_GROUP_BY` to the legacy error class `_LEGACY_ERROR_TEMP_2422`, improve its error message format, and regenerate the SQL golden files. ### Why are the changes needed? To improve user experience with Spark SQL, and unify representation of error messages. ### Does this PR introduce _any_ user-facing change? Yes, it changes an user-facing error message. ### How was this patch tested? By running the affected test suites: ``` $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes #39061 from MaxGekk/error-class-_LEGACY_ERROR_TEMP_2422. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 10 ++++----- .../sql/catalyst/analysis/CheckAnalysis.scala | 14 ++++--------- .../sql-tests/results/group-by-filter.sql.out | 12 ++++------- .../resources/sql-tests/results/group-by.sql.out | 24 ++++++++-------------- .../results/postgreSQL/select_having.sql.out | 6 +----- .../negative-cases/invalid-correlation.sql.out | 12 ++++------- .../results/udaf/udaf-group-by-ordinal.sql.out | 6 +----- .../sql-tests/results/udaf/udaf-group-by.sql.out | 12 ++++------- .../udf/postgreSQL/udf-select_having.sql.out | 6 +----- .../sql-tests/results/udf/udf-group-by.sql.out | 18 +++++----------- 10 files changed, 37 insertions(+), 83 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ab4a93798a7..7af794b9ef9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -863,6 +863,11 @@ ], "sqlState" : "42000" }, + "MISSING_GROUP_BY" : { + "message" : [ + "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses." + ] + }, "MISSING_STATIC_PARTITION_COLUMN" : { "message" : [ "Unknown static partition column: <columnName>" @@ -5116,11 +5121,6 @@ "nondeterministic expression <sqlExpr> should not appear in the arguments of an aggregate function." ] }, - "_LEGACY_ERROR_TEMP_2422" : { - "message" : [ - "grouping expressions sequence is empty, and '<sqlExpr>' is not an aggregate function. Wrap '<aggExprs>' in windowing function(s) or wrap '<sqlExpr>' in first() (or first_value) if you don't care which value you get." - ] - }, "_LEGACY_ERROR_TEMP_2423" : { "message" : [ "Correlated scalar subquery '<sqlExpr>' is neither present in the group by, nor in an aggregate function. Add it to group by using ordinal position or wrap it in first() (or first_value) if you don't care which value you get." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 11b2d6671c7..2c57c2b9bab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -402,16 +402,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB messageParameters = Map("sqlExpr" -> expr.sql)) } } - case e: Attribute if groupingExprs.isEmpty => - // Collect all [[AggregateExpressions]]s. - val aggExprs = aggregateExprs.filter(_.collect { - case a: AggregateExpression => a - }.nonEmpty) - e.failAnalysis( - errorClass = "_LEGACY_ERROR_TEMP_2422", - messageParameters = Map( - "sqlExpr" -> e.sql, - "aggExprs" -> aggExprs.map(_.sql).mkString("(", ", ", ")"))) + case _: Attribute if groupingExprs.isEmpty => + operator.failAnalysis( + errorClass = "MISSING_GROUP_BY", + messageParameters = Map.empty) case e: Attribute if !groupingExprs.exists(_.semanticEquals(e)) => throw QueryCompilationErrors.columnNotInGroupByClauseError(e) case s: ScalarSubquery diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out index 5b0231809e8..0a5a2ccb356 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out @@ -49,17 +49,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(count(testdata.b) FILTER (WHERE (testdata.a >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)", - "sqlExpr" : "testdata.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "a" + "startIndex" : 1, + "stopIndex" : 54, + "fragment" : "SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData" } ] } diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index a6202b0c1d7..177d6b07ca3 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -16,17 +16,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(count(testdata.b) AS `count(b)`)", - "sqlExpr" : "testdata.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "a" + "startIndex" : 1, + "stopIndex" : 32, + "fragment" : "SELECT a, COUNT(b) FROM testData" } ] } @@ -355,17 +351,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "()", - "sqlExpr" : "id" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 16, - "stopIndex" : 24, - "fragment" : "range(10)" + "startIndex" : 1, + "stopIndex" : 38, + "fragment" : "SELECT id FROM range(10) HAVING id > 0" } ] } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index ef7e8ef4e26..fbd7737ab61 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -141,11 +141,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(min(spark_catalog.default.test_having.a) AS `min(a#x)`, max(spark_catalog.default.test_having.a) AS `max(a#x)`)", - "sqlExpr" : "spark_catalog.default.test_having.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out index 26d402479de..395c8a71821 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -44,17 +44,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(avg(t2.t2b) AS avg)", - "sqlExpr" : "t2.t2b" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 109, - "stopIndex" : 111, - "fragment" : "t2b" + "startIndex" : 100, + "stopIndex" : 203, + "fragment" : "SELECT t2b, avg(t2b) avg\n FROM t2\n WHERE t2a = t1.t1b" } ] } diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out index a1b9a3e91c5..e0d139d99d6 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out @@ -390,11 +390,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "()", - "sqlExpr" : "data.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out index 8ed164820c6..bceb50696b4 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out @@ -16,17 +16,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "()", - "sqlExpr" : "testdata.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "a" + "startIndex" : 1, + "stopIndex" : 31, + "fragment" : "SELECT a, udaf(b) FROM testData" } ] } diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index 54f7a538832..fa93fbf2def 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -141,11 +141,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(min(spark_catalog.default.test_having.a) AS `min(a#x)`, max(spark_catalog.default.test_having.a) AS `max(a#x)`)", - "sqlExpr" : "spark_catalog.default.test_having.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index 61c47255eda..d1e1e50bde0 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -16,11 +16,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `udf(count(b))`)", - "sqlExpr" : "testdata.a" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", @@ -332,17 +328,13 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "_LEGACY_ERROR_TEMP_2422", - "messageParameters" : { - "aggExprs" : "()", - "sqlExpr" : "id" - }, + "errorClass" : "MISSING_GROUP_BY", "queryContext" : [ { "objectType" : "", "objectName" : "", - "startIndex" : 21, - "stopIndex" : 29, - "fragment" : "range(10)" + "startIndex" : 1, + "stopIndex" : 43, + "fragment" : "SELECT udf(id) FROM range(10) HAVING id > 0" } ] } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org