This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 8f52fd55d42 [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in
`conv()`
8f52fd55d42 is described below
commit 8f52fd55d42045d4aadb2cb18c7c3f99ad75eb35
Author: Mark Jarvin <[email protected]>
AuthorDate: Tue Nov 21 11:38:31 2023 -0800
[SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in `conv()`
### What changes were proposed in this pull request?
Increase the size of the buffer allocated for the result of base conversion
in `NumberConverter` to prevent ArrayIndexOutOfBoundsException when evaluating
`conv(s"${Long.MinValue}", 10, -2)`.
### Why are the changes needed?
I don't think the ArrayIndexOutOfBoundsException is intended behaviour.
### Does this PR introduce _any_ user-facing change?
Users will no longer experience an ArrayIndexOutOfBoundsException for this
specific set of arguments and will instead receive the expected base conversion.
### How was this patch tested?
New unit test cases
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43880 from markj-db/SPARK-44973.
Authored-by: Mark Jarvin <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 2ac8ff76a5169fe1f6cf130cc82738ba78bd8c65)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/catalyst/util/NumberConverter.scala | 9 ++++++++-
.../apache/spark/sql/catalyst/util/NumberConverterSuite.scala | 6 ++++++
.../test/scala/org/apache/spark/sql/MathFunctionsSuite.scala | 11 +++++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 59765cde1f9..06d3910311b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -23,6 +23,13 @@ import org.apache.spark.unsafe.types.UTF8String
object NumberConverter {
+ /**
+ * The output string has a max length of one char per bit in the 64-bit
`Long` intermediate
+ * representation plus one char for the '-' sign. This happens in practice
when converting
+ * `Long.MinValue` with `toBase` equal to -2.
+ */
+ private final val MAX_OUTPUT_LENGTH = java.lang.Long.SIZE + 1
+
/**
* Decode v into value[].
*
@@ -148,7 +155,7 @@ object NumberConverter {
var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
// Copy the digits in the right side of the array
- val temp = new Array[Byte](Math.max(n.length, 64))
+ val temp = new Array[Byte](Math.max(n.length, MAX_OUTPUT_LENGTH))
var v: Long = -1
System.arraycopy(n, first, temp, temp.length - n.length + first, n.length
- first)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index c634c5b739b..3de331f90a6 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -55,6 +55,12 @@ class NumberConverterSuite extends SparkFunSuite {
checkConv("-10", 11, 7, "45012021522523134134555")
}
+ test("SPARK-44973: conv must allocate enough space for all digits plus
negative sign") {
+ checkConv(s"${Long.MinValue}", 10, -2, BigInt(Long.MinValue).toString(2))
+ checkConv((BigInt(Long.MaxValue) + 1).toString(16), 16, -2,
BigInt(Long.MinValue).toString(2))
+ checkConv(BigInt(Long.MinValue).toString(16), 16, -2,
BigInt(Long.MinValue).toString(2))
+ }
+
test("byte to binary") {
checkToBinary(0.toByte)
checkToBinary(1.toByte)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 0adb89c3a9e..ba04e3b691a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -262,6 +262,17 @@ class MathFunctionsSuite extends QueryTest with
SharedSparkSession {
}
}
+ test("SPARK-44973: conv must allocate enough space for all digits plus
negative sign") {
+ withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+ val df = Seq(
+ ((BigInt(Long.MaxValue) + 1).toString(16)),
+ (BigInt(Long.MinValue).toString(16))
+ ).toDF("num")
+ checkAnswer(df.select(conv($"num", 16, -2)),
+ Seq(Row(BigInt(Long.MinValue).toString(2)),
Row(BigInt(Long.MinValue).toString(2))))
+ }
+ }
+
test("floor") {
testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong)
// testOneToOneMathFunction does not validate the resulting data type
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]