(spark) branch master updated: [SPARK-48556][SQL] Fix incorrect error message pointing to UNSUPPORTED_GROUPING_EXPRESSION

wenchen Tue, 11 Jun 2024 10:01:49 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 224ba162b5d6 [SPARK-48556][SQL] Fix incorrect error message pointing 
to UNSUPPORTED_GROUPING_EXPRESSION
224ba162b5d6 is described below

commit 224ba162b5d6e0b8956c423f0cb097d32f1aad4d
Author: Nikola Mandic <[email protected]>
AuthorDate: Tue Jun 11 10:01:32 2024 -0700

    [SPARK-48556][SQL] Fix incorrect error message pointing to 
UNSUPPORTED_GROUPING_EXPRESSION
    
    ### What changes were proposed in this pull request?
    
    Following sequence of queries produces `UNSUPPORTED_GROUPING_EXPRESSION` 
error:
    ```
    create table t1(a int, b int) using parquet;
    select grouping(a), dummy from t1 group by a with rollup;
    ```
    However, the appropriate error should point the user to the invalid `dummy` 
column name.
    
    Fix the problem by deprioritizing `Grouping` and `GroupingID` nodes in plan 
which were not resolved and thus cause the unwanted error.
    
    ### Why are the changes needed?
    
    To fix the described issue.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it displays proper error message to user instead of misleading one.
    
    ### How was this patch tested?
    
    Added test to `QueryCompilationErrorsSuite`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46900 from nikolamand-db/SPARK-48556.
    
    Authored-by: Nikola Mandic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      | 12 ++++++---
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 31 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f4408220ac93..bd8f8fe9f652 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -267,6 +267,7 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
         // Early checks for column definitions, to produce better error 
messages
         ColumnDefinition.checkColumnDefinitions(operator)
 
+        var stagedError: Option[() => Unit] = None
         getAllExpressions(operator).foreach(_.foreachUp {
           case a: Attribute if !a.resolved =>
             failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
@@ -305,12 +306,14 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
               s"Cannot resolve the runtime replaceable expression 
${toSQLExpr(e)}. " +
               s"The replacement is unresolved: ${toSQLExpr(e.replacement)}.")
 
+          // `Grouping` and `GroupingID` are considered as of having lower 
priority than the other
+          // nodes which cause errors.
           case g: Grouping =>
-            g.failAnalysis(
-              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", 
messageParameters = Map.empty)
+            if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", 
messageParameters = Map.empty))
           case g: GroupingID =>
-            g.failAnalysis(
-              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", 
messageParameters = Map.empty)
+            if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", 
messageParameters = Map.empty))
 
           case e: Expression if 
e.children.exists(_.isInstanceOf[WindowFunction]) &&
               !e.isInstanceOf[WindowExpression] && e.resolved =>
@@ -369,6 +372,7 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
 
           case _ =>
         })
+        if (stagedError.isDefined) stagedError.get.apply()
 
         operator match {
           case RelationTimeTravel(u: UnresolvedRelation, _, _) =>
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 4574d3328d48..958d2b0130d8 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -926,6 +926,37 @@ class QueryCompilationErrorsSuite
       parameters = Map("message" -> "Cannot convert Spark data type \"DUMMY\" 
to any Parquet type.")
     )
   }
+
+  test("SPARK-48556: Ensure UNRESOLVED_COLUMN is thrown when query has 
grouping expressions " +
+      "with invalid column name") {
+    case class UnresolvedDummyColumnTest(query: String, pos: Int)
+
+    withTable("t1") {
+      sql("create table t1(a int, b int) using parquet")
+      val tests = Seq(
+        UnresolvedDummyColumnTest("select grouping(a), dummy from t1 group by 
a with rollup", 20),
+        UnresolvedDummyColumnTest("select dummy, grouping(a) from t1 group by 
a with rollup", 7),
+        UnresolvedDummyColumnTest(
+          "select a, case when grouping(a) = 1 then 0 else b end, count(dummy) 
from t1 " +
+            "group by 1 with rollup",
+          61),
+        UnresolvedDummyColumnTest(
+          "select a, max(dummy), case when grouping(a) = 1 then 0 else b end " 
+
+            "from t1 group by 1 with rollup",
+          14)
+      )
+      tests.foreach(test => {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(test.query)
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          parameters = Map("objectName" -> "`dummy`", "proposal" -> "`a`, 
`b`"),
+          context = ExpectedContext(fragment = "dummy", start = test.pos, stop 
= test.pos + 4)
+        )
+      })
+    }
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-48556][SQL] Fix incorrect error message pointing to UNSUPPORTED_GROUPING_EXPRESSION

Reply via email to