This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 8f52fd55d42 [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in `conv()` 8f52fd55d42 is described below commit 8f52fd55d42045d4aadb2cb18c7c3f99ad75eb35 Author: Mark Jarvin <mark.jar...@databricks.com> AuthorDate: Tue Nov 21 11:38:31 2023 -0800 [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in `conv()` ### What changes were proposed in this pull request? Increase the size of the buffer allocated for the result of base conversion in `NumberConverter` to prevent ArrayIndexOutOfBoundsException when evaluating `conv(s"${Long.MinValue}", 10, -2)`. ### Why are the changes needed? I don't think the ArrayIndexOutOfBoundsException is intended behaviour. ### Does this PR introduce _any_ user-facing change? Users will no longer experience an ArrayIndexOutOfBoundsException for this specific set of arguments and will instead receive the expected base conversion. ### How was this patch tested? New unit test cases ### Was this patch authored or co-authored using generative AI tooling? No Closes #43880 from markj-db/SPARK-44973. Authored-by: Mark Jarvin <mark.jar...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> (cherry picked from commit 2ac8ff76a5169fe1f6cf130cc82738ba78bd8c65) Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../org/apache/spark/sql/catalyst/util/NumberConverter.scala | 9 ++++++++- .../apache/spark/sql/catalyst/util/NumberConverterSuite.scala | 6 ++++++ .../test/scala/org/apache/spark/sql/MathFunctionsSuite.scala | 11 +++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala index 59765cde1f9..06d3910311b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala @@ -23,6 +23,13 @@ import org.apache.spark.unsafe.types.UTF8String object NumberConverter { + /** + * The output string has a max length of one char per bit in the 64-bit `Long` intermediate + * representation plus one char for the '-' sign. This happens in practice when converting + * `Long.MinValue` with `toBase` equal to -2. + */ + private final val MAX_OUTPUT_LENGTH = java.lang.Long.SIZE + 1 + /** * Decode v into value[]. * @@ -148,7 +155,7 @@ object NumberConverter { var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0) // Copy the digits in the right side of the array - val temp = new Array[Byte](Math.max(n.length, 64)) + val temp = new Array[Byte](Math.max(n.length, MAX_OUTPUT_LENGTH)) var v: Long = -1 System.arraycopy(n, first, temp, temp.length - n.length + first, n.length - first) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala index c634c5b739b..3de331f90a6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala @@ -55,6 +55,12 @@ class NumberConverterSuite extends SparkFunSuite { checkConv("-10", 11, 7, "45012021522523134134555") } + test("SPARK-44973: conv must allocate enough space for all digits plus negative sign") { + checkConv(s"${Long.MinValue}", 10, -2, BigInt(Long.MinValue).toString(2)) + checkConv((BigInt(Long.MaxValue) + 1).toString(16), 16, -2, BigInt(Long.MinValue).toString(2)) + checkConv(BigInt(Long.MinValue).toString(16), 16, -2, BigInt(Long.MinValue).toString(2)) + } + test("byte to binary") { checkToBinary(0.toByte) checkToBinary(1.toByte) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 0adb89c3a9e..ba04e3b691a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -262,6 +262,17 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { } } + test("SPARK-44973: conv must allocate enough space for all digits plus negative sign") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { + val df = Seq( + ((BigInt(Long.MaxValue) + 1).toString(16)), + (BigInt(Long.MinValue).toString(16)) + ).toDF("num") + checkAnswer(df.select(conv($"num", 16, -2)), + Seq(Row(BigInt(Long.MinValue).toString(2)), Row(BigInt(Long.MinValue).toString(2)))) + } + } + test("floor") { testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong) // testOneToOneMathFunction does not validate the resulting data type --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org