This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new f16dd05620ab [SPARK-47646][SQL] Make try_to_number return NULL for 
malformed input
f16dd05620ab is described below

commit f16dd05620ab24357d2417458ab89c8ee51e67bf
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Fri Mar 29 17:38:10 2024 +0900

    [SPARK-47646][SQL] Make try_to_number return NULL for malformed input
    
    This PR proposes to add NULL check after parsing the number so the output 
can be safely null for `try_to_number` expression.
    
    ```scala
    import org.apache.spark.sql.functions._
    val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11")))
    df.select(try_to_number($"value", lit("$99.99"))).show()
    ```
    ```
    java.lang.NullPointerException: Cannot invoke 
"org.apache.spark.sql.types.Decimal.toPlainString()" because "<local7>" is null
            at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.serializefromobject_doConsume_0$(Unknown
 Source)
            at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
            at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
            at 
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
            at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:388)
            at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:894)
            at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:894)
            at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
            at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:368)
            at org.apache.spark.rdd.RDD.iterator(RDD.scala:332)
    ```
    
    To fix the bug, and let `try_to_number` return `NULL` for malformed input 
as designed.
    
    Yes, it fixes a bug. Previously, `try_to_number` failed with NPE.
    
    Unittest was added.
    
    No.
    
    Closes #45771 from HyukjinKwon/SPARK-47646.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../spark/sql/catalyst/expressions/numberFormatExpressions.scala     | 1 +
 .../src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala   | 5 +++++
 2 files changed, 6 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index 2d4f0438db76..9dcca65efe5a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -86,6 +86,7 @@ abstract class ToNumberBase(left: Expression, right: 
Expression, errorOnFail: Bo
         |${CodeGenerator.javaType(dataType)} ${ev.value} = 
${CodeGenerator.defaultValue(dataType)};
         |if (!${ev.isNull}) {
         |  ${ev.value} = $builder.parse(${eval.value});
+        |  ${ev.isNull} = ${ev.isNull} || (${ev.value} == null);
         |}
       """.stripMargin)
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 23e71bb2f49f..404a1e742d19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -714,4 +714,9 @@ class StringFunctionsSuite extends QueryTest with 
SharedSparkSession {
       Row("QqQQdddoooo") :: Row(null) :: Nil
     )
   }
+
+  test("SPARK-47646: try_to_number should return NULL for malformed input") {
+    val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11")))
+    checkAnswer(df.select(try_to_number($"value", lit("$99.99"))), 
Seq(Row(null)))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to