[spark] branch master updated: [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422`

maxgekk Thu, 15 Dec 2022 23:21:02 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 066870d938c [SPARK-41518][SQL] Assign a name to the error class 
`_LEGACY_ERROR_TEMP_2422`
066870d938c is described below

commit 066870d938cf7fb2f088c2a7f6a036de6fb5b7d2
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Fri Dec 16 10:20:38 2022 +0300

    [SPARK-41518][SQL] Assign a name to the error class 
`_LEGACY_ERROR_TEMP_2422`
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to assign new name `MISSING_GROUP_BY` to the legacy 
error class `_LEGACY_ERROR_TEMP_2422`, improve its error message format, and 
regenerate the SQL golden files.
    
    ### Why are the changes needed?
    To improve user experience with Spark SQL, and unify representation of 
error messages.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, it changes an user-facing error message.
    
    ### How was this patch tested?
    By running the affected test suites:
    ```
    $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly 
org.apache.spark.sql.SQLQueryTestSuite"
    ```
    
    Closes #39061 from MaxGekk/error-class-_LEGACY_ERROR_TEMP_2422.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 10 ++++-----
 .../sql/catalyst/analysis/CheckAnalysis.scala      | 14 ++++---------
 .../sql-tests/results/group-by-filter.sql.out      | 12 ++++-------
 .../resources/sql-tests/results/group-by.sql.out   | 24 ++++++++--------------
 .../results/postgreSQL/select_having.sql.out       |  6 +-----
 .../negative-cases/invalid-correlation.sql.out     | 12 ++++-------
 .../results/udaf/udaf-group-by-ordinal.sql.out     |  6 +-----
 .../sql-tests/results/udaf/udaf-group-by.sql.out   | 12 ++++-------
 .../udf/postgreSQL/udf-select_having.sql.out       |  6 +-----
 .../sql-tests/results/udf/udf-group-by.sql.out     | 18 +++++-----------
 10 files changed, 37 insertions(+), 83 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index ab4a93798a7..7af794b9ef9 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -863,6 +863,11 @@
     ],
     "sqlState" : "42000"
   },
+  "MISSING_GROUP_BY" : {
+    "message" : [
+      "The query does not include a GROUP BY clause. Add GROUP BY or turn it 
into the window functions using OVER clauses."
+    ]
+  },
   "MISSING_STATIC_PARTITION_COLUMN" : {
     "message" : [
       "Unknown static partition column: <columnName>"
@@ -5116,11 +5121,6 @@
       "nondeterministic expression <sqlExpr> should not appear in the 
arguments of an aggregate function."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2422" : {
-    "message" : [
-      "grouping expressions sequence is empty, and '<sqlExpr>' is not an 
aggregate function. Wrap '<aggExprs>' in windowing function(s) or wrap 
'<sqlExpr>' in first() (or first_value) if you don't care which value you get."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2423" : {
     "message" : [
       "Correlated scalar subquery '<sqlExpr>' is neither present in the group 
by, nor in an aggregate function. Add it to group by using ordinal position or 
wrap it in first() (or first_value) if you don't care which value you get."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 11b2d6671c7..2c57c2b9bab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -402,16 +402,10 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
                       messageParameters = Map("sqlExpr" -> expr.sql))
                   }
                 }
-              case e: Attribute if groupingExprs.isEmpty =>
-                // Collect all [[AggregateExpressions]]s.
-                val aggExprs = aggregateExprs.filter(_.collect {
-                  case a: AggregateExpression => a
-                }.nonEmpty)
-                e.failAnalysis(
-                  errorClass = "_LEGACY_ERROR_TEMP_2422",
-                  messageParameters = Map(
-                    "sqlExpr" -> e.sql,
-                    "aggExprs" -> aggExprs.map(_.sql).mkString("(", ", ", 
")")))
+              case _: Attribute if groupingExprs.isEmpty =>
+                operator.failAnalysis(
+                  errorClass = "MISSING_GROUP_BY",
+                  messageParameters = Map.empty)
               case e: Attribute if !groupingExprs.exists(_.semanticEquals(e)) 
=>
                 throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
               case s: ScalarSubquery
diff --git 
a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out 
b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
index 5b0231809e8..0a5a2ccb356 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
@@ -49,17 +49,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(count(testdata.b) FILTER (WHERE (testdata.a >= 2)) AS 
`count(b) FILTER (WHERE (a >= 2))`)",
-    "sqlExpr" : "testdata.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 8,
-    "fragment" : "a"
+    "startIndex" : 1,
+    "stopIndex" : 54,
+    "fragment" : "SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData"
   } ]
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index a6202b0c1d7..177d6b07ca3 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -16,17 +16,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(count(testdata.b) AS `count(b)`)",
-    "sqlExpr" : "testdata.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 8,
-    "fragment" : "a"
+    "startIndex" : 1,
+    "stopIndex" : 32,
+    "fragment" : "SELECT a, COUNT(b) FROM testData"
   } ]
 }
 
@@ -355,17 +351,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "()",
-    "sqlExpr" : "id"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 24,
-    "fragment" : "range(10)"
+    "startIndex" : 1,
+    "stopIndex" : 38,
+    "fragment" : "SELECT id FROM range(10) HAVING id > 0"
   } ]
 }
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
index ef7e8ef4e26..fbd7737ab61 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
@@ -141,11 +141,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(min(spark_catalog.default.test_having.a) AS `min(a#x)`, 
max(spark_catalog.default.test_having.a) AS `max(a#x)`)",
-    "sqlExpr" : "spark_catalog.default.test_having.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index 26d402479de..395c8a71821 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -44,17 +44,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(avg(t2.t2b) AS avg)",
-    "sqlExpr" : "t2.t2b"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 109,
-    "stopIndex" : 111,
-    "fragment" : "t2b"
+    "startIndex" : 100,
+    "stopIndex" : 203,
+    "fragment" : "SELECT   t2b, avg(t2b) avg\n                      FROM     
t2\n                      WHERE    t2a = t1.t1b"
   } ]
 }
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
index a1b9a3e91c5..e0d139d99d6 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -390,11 +390,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "()",
-    "sqlExpr" : "data.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
index 8ed164820c6..bceb50696b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
@@ -16,17 +16,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "()",
-    "sqlExpr" : "testdata.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 8,
-    "fragment" : "a"
+    "startIndex" : 1,
+    "stopIndex" : 31,
+    "fragment" : "SELECT a, udaf(b) FROM testData"
   } ]
 }
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
index 54f7a538832..fa93fbf2def 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
@@ -141,11 +141,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(min(spark_catalog.default.test_having.a) AS `min(a#x)`, 
max(spark_catalog.default.test_having.a) AS `max(a#x)`)",
-    "sqlExpr" : "spark_catalog.default.test_having.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index 61c47255eda..d1e1e50bde0 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -16,11 +16,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "(CAST(udf(cast(count(b) as string)) AS BIGINT) AS 
`udf(count(b))`)",
-    "sqlExpr" : "testdata.a"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -332,17 +328,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2422",
-  "messageParameters" : {
-    "aggExprs" : "()",
-    "sqlExpr" : "id"
-  },
+  "errorClass" : "MISSING_GROUP_BY",
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 21,
-    "stopIndex" : 29,
-    "fragment" : "range(10)"
+    "startIndex" : 1,
+    "stopIndex" : 43,
+    "fragment" : "SELECT udf(id) FROM range(10) HAVING id > 0"
   } ]
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-41518][SQL] Assign a name to the error class `_LEGACY_ERROR_TEMP_2422`

Reply via email to