This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 681a1de72bdf [SPARK-48081][SQL][3.4] Fix ClassCastException in NTile.checkInputDataTypes() when argument is non-foldable or of wrong type 681a1de72bdf is described below commit 681a1de72bdf749e0a0782dde9bddfcbb3248d99 Author: Josh Rosen <joshro...@databricks.com> AuthorDate: Thu May 2 12:50:54 2024 -0700 [SPARK-48081][SQL][3.4] Fix ClassCastException in NTile.checkInputDataTypes() when argument is non-foldable or of wrong type branch-3.4 pick of PR https://github.com/apache/spark/pull/46333 , fixing test issue due to difference in expected error message parameter formatting across branches; original description follows below: --- ### What changes were proposed in this pull request? While migrating the `NTile` expression's type check failures to the new error class framework, PR https://github.com/apache/spark/pull/38457 removed a pair of not-unnecessary `return` statements and thus caused certain branches' values to be discarded rather than returned. As a result, invalid usages like ``` select ntile(99.9) OVER (order by id) from range(10) ``` trigger internal errors like errors like ``` java.lang.ClassCastException: class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Integer (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Integer is in module java.base of loader 'bootstrap') at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:99) at org.apache.spark.sql.catalyst.expressions.NTile.checkInputDataTypes(windowExpressions.scala:877) ``` instead of clear error framework errors like ``` org.apache.spark.sql.catalyst.ExtendedAnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "ntile(99.9)" due to data type mismatch: The first parameter requires the "INT" type, however "99.9" has the type "DECIMAL(3,1)". SQLSTATE: 42K09; line 1 pos 7; 'Project [unresolvedalias(ntile(99.9) windowspecdefinition(id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())))] +- Range (0, 10, step=1, splits=None) at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:315) ``` ### Why are the changes needed? Improve error messages. ### Does this PR introduce _any_ user-facing change? Yes, it improves an error message. ### How was this patch tested? Added a new test case to AnalysisErrorSuite. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46337 from JoshRosen/SPARK-48081-branch-3.4. Authored-by: Josh Rosen <joshro...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../catalyst/expressions/windowExpressions.scala | 4 +-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 34 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 2d11b581ee4c..adc32866f58d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -848,7 +848,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow // for each partition. override def checkInputDataTypes(): TypeCheckResult = { if (!buckets.foldable) { - DataTypeMismatch( + return DataTypeMismatch( errorSubClass = "NON_FOLDABLE_INPUT", messageParameters = Map( "inputName" -> "buckets", @@ -859,7 +859,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow } if (buckets.dataType != IntegerType) { - DataTypeMismatch( + return DataTypeMismatch( errorSubClass = "UNEXPECTED_INPUT_TYPE", messageParameters = Map( "paramIndex" -> "1", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index cbd6749807f7..ebc133719238 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -316,6 +316,40 @@ class AnalysisErrorSuite extends AnalysisTest { listRelation.select(Explode($"list").as("a"), Explode($"list").as("b")), "only one generator" :: "explode" :: Nil) + errorClassTest( + "the buckets of ntile window function is not foldable", + testRelation2.select( + WindowExpression( + NTile(Literal(99.9f)), + WindowSpecDefinition( + UnresolvedAttribute("a") :: Nil, + SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, + UnspecifiedFrame)).as("window")), + errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + messageParameters = Map( + "sqlExpr" -> "\"ntile(99.9)\"", + "paramIndex" -> "1", + "inputSql" -> "\"99.9\"", + "inputType" -> "\"FLOAT\"", + "requiredType" -> "\"INT\"")) + + + errorClassTest( + "the buckets of ntile window function is not int literal", + testRelation2.select( + WindowExpression( + NTile(AttributeReference("b", IntegerType)()), + WindowSpecDefinition( + UnresolvedAttribute("a") :: Nil, + SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, + UnspecifiedFrame)).as("window")), + errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + messageParameters = Map( + "sqlExpr" -> "\"ntile(b)\"", + "inputName" -> "buckets", + "inputExpr" -> "\"b\"", + "inputType" -> "\"INT\"")) + errorClassTest( "unresolved attributes", testRelation.select($"abcd"), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org