This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b99a64b0fd1c [SPARK-48081] Fix ClassCastException in 
NTile.checkInputDataTypes() when argument is non-foldable or of wrong type
b99a64b0fd1c is described below

commit b99a64b0fd1cf4b32dd2f17423775db87bae20a6
Author: Josh Rosen <joshro...@databricks.com>
AuthorDate: Thu May 2 07:22:44 2024 -0700

    [SPARK-48081] Fix ClassCastException in NTile.checkInputDataTypes() when 
argument is non-foldable or of wrong type
    
    ### What changes were proposed in this pull request?
    
    While migrating the `NTile` expression's type check failures to the new 
error class framework, PR https://github.com/apache/spark/pull/38457 removed a 
pair of not-unnecessary `return` statements and thus caused certain branches' 
values to be discarded rather than returned.
    
    As a result, invalid usages like
    
    ```
    select ntile(99.9) OVER (order by id) from range(10)
    ```
    
    trigger internal errors like errors like
    
    ```
     java.lang.ClassCastException: class org.apache.spark.sql.types.Decimal 
cannot be cast to class java.lang.Integer (org.apache.spark.sql.types.Decimal 
is in unnamed module of loader 'app'; java.lang.Integer is in module java.base 
of loader 'bootstrap')
      at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:99)
      at 
org.apache.spark.sql.catalyst.expressions.NTile.checkInputDataTypes(windowExpressions.scala:877)
    ```
    
    instead of clear error framework errors like
    
    ```
    org.apache.spark.sql.catalyst.ExtendedAnalysisException: 
[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "ntile(99.9)" due to 
data type mismatch: The first parameter requires the "INT" type, however "99.9" 
has the type "DECIMAL(3,1)". SQLSTATE: 42K09; line 1 pos 7;
    'Project [unresolvedalias(ntile(99.9) windowspecdefinition(id#0L ASC NULLS 
FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())))]
    +- Range (0, 10, step=1, splits=None)
    
      at 
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73)
      at 
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:315)
    ```
    
    ### Why are the changes needed?
    
    Improve error messages.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it improves an error message.
    
    ### How was this patch tested?
    
    Added a new test case to AnalysisErrorSuite.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46333 from JoshRosen/SPARK-48081.
    
    Authored-by: Josh Rosen <joshro...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../catalyst/expressions/windowExpressions.scala   |  4 +--
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 34 ++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 00711332350c..5881c456f6e8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -853,7 +853,7 @@ case class NTile(buckets: Expression) extends RowNumberLike 
with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("buckets"),
@@ -864,7 +864,7 @@ case class NTile(buckets: Expression) extends RowNumberLike 
with SizeBasedWindow
     }
 
     if (buckets.dataType != IntegerType) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> ordinalNumber(0),
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index f12d22409691..19eb3a418543 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -360,6 +360,40 @@ class AnalysisErrorSuite extends AnalysisTest with 
DataTypeErrorsBase {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
+  errorClassTest(
+    "the buckets of ntile window function is not foldable",
+    testRelation2.select(
+      WindowExpression(
+        NTile(Literal(99.9f)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(99.9)\"",
+      "paramIndex" -> "first",
+      "inputSql" -> "\"99.9\"",
+      "inputType" -> "\"FLOAT\"",
+      "requiredType" -> "\"INT\""))
+
+
+  errorClassTest(
+    "the buckets of ntile window function is not int literal",
+    testRelation2.select(
+      WindowExpression(
+        NTile(AttributeReference("b", IntegerType)()),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(b)\"",
+      "inputName" -> "`buckets`",
+      "inputExpr" -> "\"b\"",
+      "inputType" -> "\"INT\""))
+
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to