This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 19ad7f7135fb [SPARK-50769][SQL] Fix ClassCastException in 
HistogramNumeric
19ad7f7135fb is described below

commit 19ad7f7135fb2629e9be660d9046361ace191a99
Author: Linhong Liu <[email protected]>
AuthorDate: Thu Jan 9 12:08:16 2025 +0800

    [SPARK-50769][SQL] Fix ClassCastException in HistogramNumeric
    
    ### What changes were proposed in this pull request?
    The `HistogramNumeric` accepts `NumberType` but it doesn't properly handle 
the `DecimalType` in the execution. Therefore, the `ClassCastException` when 
trying to change a Decimal to Double.
    
    ### Why are the changes needed?
    bug fix
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    UT
    ```
     build/sbt "sql/testOnly *SQLQueryTestSuite -- -z group-by.sql"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #49418 from linhongliu-db/SPARK-50769.
    
    Authored-by: Linhong Liu <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit b84dc909a8856388faddc154c6a1d3aba271474e)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/expressions/aggregate/HistogramNumeric.scala  | 10 +++++++++-
 .../test/resources/sql-tests/analyzer-results/group-by.sql.out |  9 +++++++++
 sql/core/src/test/resources/sql-tests/inputs/group-by.sql      |  2 ++
 sql/core/src/test/resources/sql-tests/results/group-by.sql.out |  9 +++++++++
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
index 7b548ab930c1..9f9e070e4d80 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
@@ -126,7 +126,10 @@ case class HistogramNumeric(
     // Ignore empty rows, for example: histogram_numeric(null)
     if (value != null) {
       // Convert the value to a double value
-      val doubleValue = value.asInstanceOf[Number].doubleValue
+      val doubleValue = value match {
+        case d: Decimal => d.toDouble
+        case o => o.asInstanceOf[Number].doubleValue()
+      }
       buffer.add(doubleValue)
     }
     buffer
@@ -161,6 +164,11 @@ case class HistogramNumeric(
             case ShortType => coord.x.toShort
             case _: DayTimeIntervalType | LongType | TimestampType | 
TimestampNTZType =>
               coord.x.toLong
+            case d: DecimalType =>
+              val bigDecimal = BigDecimal
+                .decimal(coord.x, new java.math.MathContext(d.precision))
+                .setScale(d.scale, BigDecimal.RoundingMode.HALF_UP)
+              Decimal(bigDecimal)
             case _ => coord.x
           }
           InternalRow.apply(result, coord.y)
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 93c463575dc1..057cbe9054bc 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1072,6 +1072,15 @@ Aggregate [histogram_numeric(col#xL, 3, 0, 0) AS 
histogram_numeric(col, 3)#x]
    +- LocalRelation [col#xL]
 
 
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS 
DECIMAL(4, 2))) AS tab(col)
+-- !query analysis
+Aggregate [histogram_numeric(col#x, 3, 0, 0) AS histogram_numeric(col, 3)#x]
++- SubqueryAlias tab
+   +- LocalRelation [col#x]
+
+
 -- !query
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS 
tab(col)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql 
b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index ce1b422de319..f8e7d8432295 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -221,6 +221,8 @@ SELECT histogram_numeric(col, 3) FROM VALUES
   (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS 
tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES
   (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS 
DECIMAL(4, 2))) AS tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS 
tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out 
b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 548917ef79b2..7c5f77565773 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1005,6 +1005,15 @@ struct<histogram_numeric(col, 
3):array<struct<x:bigint,y:double>>>
 [{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
 
 
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS 
DECIMAL(4, 2))) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:decimal(4,2),y:double>>>
+-- !query output
+[{"x":1.00,"y":1.0},{"x":2.00,"y":1.0},{"x":3.00,"y":1.0}]
+
+
 -- !query
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS 
tab(col)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to