This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 44808c1a385 [SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in
`conv()`
44808c1a385 is described below
commit 44808c1a385a03d0bd9b838eee5aca71180278c8
Author: Mark Jarvin <[email protected]>
AuthorDate: Tue Nov 21 11:38:31 2023 -0800
[SPARK-44973][SQL] Fix `ArrayIndexOutOfBoundsException` in `conv()`
### What changes were proposed in this pull request?
Increase the size of the buffer allocated for the result of base conversion
in `NumberConverter` to prevent ArrayIndexOutOfBoundsException when evaluating
`conv(s"${Long.MinValue}", 10, -2)`.
### Why are the changes needed?
I don't think the ArrayIndexOutOfBoundsException is intended behaviour.
### Does this PR introduce _any_ user-facing change?
Users will no longer experience an ArrayIndexOutOfBoundsException for this
specific set of arguments and will instead receive the expected base conversion.
### How was this patch tested?
New unit test cases
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43880 from markj-db/SPARK-44973.
Authored-by: Mark Jarvin <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 2ac8ff76a5169fe1f6cf130cc82738ba78bd8c65)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/catalyst/util/NumberConverter.scala | 9 ++++++++-
.../apache/spark/sql/catalyst/util/NumberConverterSuite.scala | 6 ++++++
.../test/scala/org/apache/spark/sql/MathFunctionsSuite.scala | 11 +++++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 68a1ba25423..052ff3d370e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -21,6 +21,13 @@ import org.apache.spark.unsafe.types.UTF8String
object NumberConverter {
+ /**
+ * The output string has a max length of one char per bit in the 64-bit
`Long` intermediate
+ * representation plus one char for the '-' sign. This happens in practice
when converting
+ * `Long.MinValue` with `toBase` equal to -2.
+ */
+ private final val MAX_OUTPUT_LENGTH = java.lang.Long.SIZE + 1
+
/**
* Decode v into value[].
*
@@ -128,7 +135,7 @@ object NumberConverter {
var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
// Copy the digits in the right side of the array
- val temp = new Array[Byte](Math.max(n.length, 64))
+ val temp = new Array[Byte](Math.max(n.length, MAX_OUTPUT_LENGTH))
var v: Long = -1
System.arraycopy(n, first, temp, temp.length - n.length + first, n.length
- first)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index eb257b79756..9145dc9eabd 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -44,6 +44,12 @@ class NumberConverterSuite extends SparkFunSuite {
checkConv("-10", 11, 7, "45012021522523134134555")
}
+ test("SPARK-44973: conv must allocate enough space for all digits plus
negative sign") {
+ checkConv(s"${Long.MinValue}", 10, -2, BigInt(Long.MinValue).toString(2))
+ checkConv((BigInt(Long.MaxValue) + 1).toString(16), 16, -2,
BigInt(Long.MinValue).toString(2))
+ checkConv(BigInt(Long.MinValue).toString(16), 16, -2,
BigInt(Long.MinValue).toString(2))
+ }
+
test("byte to binary") {
checkToBinary(0.toByte)
checkToBinary(1.toByte)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 88071da293a..d365ed6b965 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -255,6 +255,17 @@ class MathFunctionsSuite extends QueryTest with
SharedSparkSession {
checkAnswer(df.select(conv('num, 16, -10)), Seq(Row("-1"),
Row("-6148914734186190176")))
}
+ test("SPARK-44973: conv must allocate enough space for all digits plus
negative sign") {
+ withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+ val df = Seq(
+ ((BigInt(Long.MaxValue) + 1).toString(16)),
+ (BigInt(Long.MinValue).toString(16))
+ ).toDF("num")
+ checkAnswer(df.select(conv($"num", 16, -2)),
+ Seq(Row(BigInt(Long.MinValue).toString(2)),
Row(BigInt(Long.MinValue).toString(2))))
+ }
+ }
+
test("floor") {
testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong)
// testOneToOneMathFunction does not validate the resulting data type
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]