This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 224ba162b5d6 [SPARK-48556][SQL] Fix incorrect error message pointing
to UNSUPPORTED_GROUPING_EXPRESSION
224ba162b5d6 is described below
commit 224ba162b5d6e0b8956c423f0cb097d32f1aad4d
Author: Nikola Mandic <[email protected]>
AuthorDate: Tue Jun 11 10:01:32 2024 -0700
[SPARK-48556][SQL] Fix incorrect error message pointing to
UNSUPPORTED_GROUPING_EXPRESSION
### What changes were proposed in this pull request?
Following sequence of queries produces `UNSUPPORTED_GROUPING_EXPRESSION`
error:
```
create table t1(a int, b int) using parquet;
select grouping(a), dummy from t1 group by a with rollup;
```
However, the appropriate error should point the user to the invalid `dummy`
column name.
Fix the problem by deprioritizing `Grouping` and `GroupingID` nodes in plan
which were not resolved and thus cause the unwanted error.
### Why are the changes needed?
To fix the described issue.
### Does this PR introduce _any_ user-facing change?
Yes, it displays proper error message to user instead of misleading one.
### How was this patch tested?
Added test to `QueryCompilationErrorsSuite`.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #46900 from nikolamand-db/SPARK-48556.
Authored-by: Nikola Mandic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/CheckAnalysis.scala | 12 ++++++---
.../sql/errors/QueryCompilationErrorsSuite.scala | 31 ++++++++++++++++++++++
2 files changed, 39 insertions(+), 4 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f4408220ac93..bd8f8fe9f652 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -267,6 +267,7 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog with QueryErrorsB
// Early checks for column definitions, to produce better error
messages
ColumnDefinition.checkColumnDefinitions(operator)
+ var stagedError: Option[() => Unit] = None
getAllExpressions(operator).foreach(_.foreachUp {
case a: Attribute if !a.resolved =>
failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
@@ -305,12 +306,14 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog with QueryErrorsB
s"Cannot resolve the runtime replaceable expression
${toSQLExpr(e)}. " +
s"The replacement is unresolved: ${toSQLExpr(e.replacement)}.")
+ // `Grouping` and `GroupingID` are considered as of having lower
priority than the other
+ // nodes which cause errors.
case g: Grouping =>
- g.failAnalysis(
- errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
messageParameters = Map.empty)
+ if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+ errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
messageParameters = Map.empty))
case g: GroupingID =>
- g.failAnalysis(
- errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
messageParameters = Map.empty)
+ if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+ errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
messageParameters = Map.empty))
case e: Expression if
e.children.exists(_.isInstanceOf[WindowFunction]) &&
!e.isInstanceOf[WindowExpression] && e.resolved =>
@@ -369,6 +372,7 @@ trait CheckAnalysis extends PredicateHelper with
LookupCatalog with QueryErrorsB
case _ =>
})
+ if (stagedError.isDefined) stagedError.get.apply()
operator match {
case RelationTimeTravel(u: UnresolvedRelation, _, _) =>
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 4574d3328d48..958d2b0130d8 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -926,6 +926,37 @@ class QueryCompilationErrorsSuite
parameters = Map("message" -> "Cannot convert Spark data type \"DUMMY\"
to any Parquet type.")
)
}
+
+ test("SPARK-48556: Ensure UNRESOLVED_COLUMN is thrown when query has
grouping expressions " +
+ "with invalid column name") {
+ case class UnresolvedDummyColumnTest(query: String, pos: Int)
+
+ withTable("t1") {
+ sql("create table t1(a int, b int) using parquet")
+ val tests = Seq(
+ UnresolvedDummyColumnTest("select grouping(a), dummy from t1 group by
a with rollup", 20),
+ UnresolvedDummyColumnTest("select dummy, grouping(a) from t1 group by
a with rollup", 7),
+ UnresolvedDummyColumnTest(
+ "select a, case when grouping(a) = 1 then 0 else b end, count(dummy)
from t1 " +
+ "group by 1 with rollup",
+ 61),
+ UnresolvedDummyColumnTest(
+ "select a, max(dummy), case when grouping(a) = 1 then 0 else b end "
+
+ "from t1 group by 1 with rollup",
+ 14)
+ )
+ tests.foreach(test => {
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(test.query)
+ },
+ errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ parameters = Map("objectName" -> "`dummy`", "proposal" -> "`a`,
`b`"),
+ context = ExpectedContext(fragment = "dummy", start = test.pos, stop
= test.pos + 4)
+ )
+ })
+ }
+ }
}
class MyCastToString extends SparkUserDefinedFunction(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]