This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 8f52fd55d42 [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in 
`conv()`
8f52fd55d42 is described below

commit 8f52fd55d42045d4aadb2cb18c7c3f99ad75eb35
Author: Mark Jarvin <mark.jar...@databricks.com>
AuthorDate: Tue Nov 21 11:38:31 2023 -0800

    [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in `conv()`
    
    ### What changes were proposed in this pull request?
    
    Increase the size of the buffer allocated for the result of base conversion 
in `NumberConverter` to prevent ArrayIndexOutOfBoundsException when evaluating 
`conv(s"${Long.MinValue}", 10, -2)`.
    
    ### Why are the changes needed?
    
    I don't think the ArrayIndexOutOfBoundsException is intended behaviour.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Users will no longer experience an ArrayIndexOutOfBoundsException for this 
specific set of arguments and will instead receive the expected base conversion.
    
    ### How was this patch tested?
    
    New unit test cases
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #43880 from markj-db/SPARK-44973.
    
    Authored-by: Mark Jarvin <mark.jar...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 2ac8ff76a5169fe1f6cf130cc82738ba78bd8c65)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../org/apache/spark/sql/catalyst/util/NumberConverter.scala  |  9 ++++++++-
 .../apache/spark/sql/catalyst/util/NumberConverterSuite.scala |  6 ++++++
 .../test/scala/org/apache/spark/sql/MathFunctionsSuite.scala  | 11 +++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 59765cde1f9..06d3910311b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -23,6 +23,13 @@ import org.apache.spark.unsafe.types.UTF8String
 
 object NumberConverter {
 
+  /**
+   * The output string has a max length of one char per bit in the 64-bit 
`Long` intermediate
+   * representation plus one char for the '-' sign.  This happens in practice 
when converting
+   * `Long.MinValue` with `toBase` equal to -2.
+   */
+  private final val MAX_OUTPUT_LENGTH = java.lang.Long.SIZE + 1
+
   /**
    * Decode v into value[].
    *
@@ -148,7 +155,7 @@ object NumberConverter {
     var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
 
     // Copy the digits in the right side of the array
-    val temp = new Array[Byte](Math.max(n.length, 64))
+    val temp = new Array[Byte](Math.max(n.length, MAX_OUTPUT_LENGTH))
     var v: Long = -1
 
     System.arraycopy(n, first, temp, temp.length - n.length + first, n.length 
- first)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index c634c5b739b..3de331f90a6 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -55,6 +55,12 @@ class NumberConverterSuite extends SparkFunSuite {
     checkConv("-10", 11, 7, "45012021522523134134555")
   }
 
+  test("SPARK-44973: conv must allocate enough space for all digits plus 
negative sign") {
+    checkConv(s"${Long.MinValue}", 10, -2, BigInt(Long.MinValue).toString(2))
+    checkConv((BigInt(Long.MaxValue) + 1).toString(16), 16, -2, 
BigInt(Long.MinValue).toString(2))
+    checkConv(BigInt(Long.MinValue).toString(16), 16, -2, 
BigInt(Long.MinValue).toString(2))
+  }
+
   test("byte to binary") {
     checkToBinary(0.toByte)
     checkToBinary(1.toByte)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 0adb89c3a9e..ba04e3b691a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -262,6 +262,17 @@ class MathFunctionsSuite extends QueryTest with 
SharedSparkSession {
     }
   }
 
+  test("SPARK-44973: conv must allocate enough space for all digits plus 
negative sign") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      val df = Seq(
+        ((BigInt(Long.MaxValue) + 1).toString(16)),
+        (BigInt(Long.MinValue).toString(16))
+      ).toDF("num")
+      checkAnswer(df.select(conv($"num", 16, -2)),
+        Seq(Row(BigInt(Long.MinValue).toString(2)), 
Row(BigInt(Long.MinValue).toString(2))))
+    }
+  }
+
   test("floor") {
     testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong)
     // testOneToOneMathFunction does not validate the resulting data type


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to