This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 313479c45421 [SPARK-48713][SQL] Add index range check for
UnsafeRow.pointTo when baseObject is byte array
313479c45421 is described below
commit 313479c454216085abb2dd4da011eb39b216cb40
Author: Yi Wu <[email protected]>
AuthorDate: Wed Jun 26 17:33:56 2024 +0800
[SPARK-48713][SQL] Add index range check for UnsafeRow.pointTo when
baseObject is byte array
### What changes were proposed in this pull request?
This PR proposes to add a index range check for `UnsafeRow.pointTo()` when
`baseObject` is byte array.
### Why are the changes needed?
All the other places like `readExternal()`, `read()` ensures `sizeInBytes`
can't be larger than the lenght of `baseObject` when it is a byte array excepet
`pointTo()`. So adding this check helps us to get better error stack info in
the first place when the index went wrong.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added unit test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47087 from Ngone51/ur.
Authored-by: Yi Wu <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../spark/sql/catalyst/expressions/UnsafeRow.java | 13 ++++++++
.../org/apache/spark/sql/UnsafeRowSuite.scala | 38 +++++++++++++++++++++-
2 files changed, 50 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 6325ba68af5b..8741c206f2bb 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -21,12 +21,14 @@ import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
+import java.util.Map;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
+import org.apache.spark.SparkIllegalArgumentException;
import org.apache.spark.SparkUnsupportedOperationException;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.types.*;
@@ -155,6 +157,17 @@ public final class UnsafeRow extends InternalRow
implements Externalizable, Kryo
public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
assert numFields >= 0 : "numFields (" + numFields + ") should >= 0";
assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be
a multiple of 8";
+ if (baseObject instanceof byte[] bytes) {
+ int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
+ if (offsetInByteArray < 0 || sizeInBytes < 0 ||
+ bytes.length < offsetInByteArray + sizeInBytes) {
+ throw new SparkIllegalArgumentException(
+ "INTERNAL_ERROR",
+ Map.of("message", "Invalid byte array backed UnsafeRow: byte array
length=" +
+ bytes.length + ", offset=" + offsetInByteArray + ", byte size=" +
sizeInBytes)
+ );
+ }
+ }
this.baseObject = baseObject;
this.baseOffset = baseOffset;
this.sizeInBytes = sizeInBytes;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index 9daa69ce9f15..18a6c538e0a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
import java.io.ByteArrayOutputStream
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite,
SparkIllegalArgumentException}
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
@@ -188,4 +188,40 @@ class UnsafeRowSuite extends SparkFunSuite {
unsafeRow.setDecimal(0, d2, 38)
assert(unsafeRow.getDecimal(0, 38, 18) === null)
}
+
+ test("SPARK-48713: throw SparkIllegalArgumentException for illegal
UnsafeRow.pointTo") {
+ val emptyRow = UnsafeRow.createFromByteArray(64, 2)
+ val byteArray = new Array[Byte](64)
+
+ // Out of bounds
+ var errorMsg = intercept[SparkIllegalArgumentException] {
+ emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, 32)
+ }.getMessage
+ assert(
+ errorMsg.contains(
+ "Invalid byte array backed UnsafeRow: byte array length=64, offset=50,
byte size=32"
+ )
+ )
+
+ // Negative size
+ errorMsg = intercept[SparkIllegalArgumentException] {
+ emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, -32)
+ }.getMessage
+ assert(
+ errorMsg.contains(
+ "Invalid byte array backed UnsafeRow: byte array length=64, offset=50,
byte size=-32"
+ )
+ )
+
+ // Negative offset
+ errorMsg = intercept[SparkIllegalArgumentException] {
+ emptyRow.pointTo(byteArray, -5, 32)
+ }.getMessage
+ assert(
+ errorMsg.contains(
+ s"Invalid byte array backed UnsafeRow: byte array length=64, " +
+ s"offset=${-5 - Platform.BYTE_ARRAY_OFFSET}, byte size=32"
+ )
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]