This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 313479c45421 [SPARK-48713][SQL] Add index range check for 
UnsafeRow.pointTo when baseObject is byte array
313479c45421 is described below

commit 313479c454216085abb2dd4da011eb39b216cb40
Author: Yi Wu <[email protected]>
AuthorDate: Wed Jun 26 17:33:56 2024 +0800

    [SPARK-48713][SQL] Add index range check for UnsafeRow.pointTo when 
baseObject is byte array
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to add a index range check for `UnsafeRow.pointTo()` when 
`baseObject` is byte array.
    
    ### Why are the changes needed?
    
    All the other places like `readExternal()`, `read()` ensures `sizeInBytes` 
can't be larger than the lenght of `baseObject` when it is a byte array excepet 
`pointTo()`. So adding this check helps us to get better error stack info in 
the first place when the index went wrong.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added unit test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #47087 from Ngone51/ur.
    
    Authored-by: Yi Wu <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../spark/sql/catalyst/expressions/UnsafeRow.java  | 13 ++++++++
 .../org/apache/spark/sql/UnsafeRowSuite.scala      | 38 +++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 6325ba68af5b..8741c206f2bb 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -21,12 +21,14 @@ import java.io.*;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.util.Map;
 
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.KryoSerializable;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 
+import org.apache.spark.SparkIllegalArgumentException;
 import org.apache.spark.SparkUnsupportedOperationException;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.types.*;
@@ -155,6 +157,17 @@ public final class UnsafeRow extends InternalRow 
implements Externalizable, Kryo
   public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
     assert numFields >= 0 : "numFields (" + numFields + ") should >= 0";
     assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be 
a multiple of 8";
+    if (baseObject instanceof byte[] bytes) {
+      int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
+      if (offsetInByteArray < 0 || sizeInBytes < 0 ||
+          bytes.length < offsetInByteArray + sizeInBytes) {
+        throw new SparkIllegalArgumentException(
+          "INTERNAL_ERROR",
+          Map.of("message", "Invalid byte array backed UnsafeRow: byte array 
length=" +
+            bytes.length + ", offset=" + offsetInByteArray + ", byte size=" + 
sizeInBytes)
+        );
+      }
+    }
     this.baseObject = baseObject;
     this.baseOffset = baseOffset;
     this.sizeInBytes = sizeInBytes;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index 9daa69ce9f15..18a6c538e0a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.io.ByteArrayOutputStream
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, 
SparkIllegalArgumentException}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
@@ -188,4 +188,40 @@ class UnsafeRowSuite extends SparkFunSuite {
     unsafeRow.setDecimal(0, d2, 38)
     assert(unsafeRow.getDecimal(0, 38, 18) === null)
   }
+
+  test("SPARK-48713: throw SparkIllegalArgumentException for illegal 
UnsafeRow.pointTo") {
+    val emptyRow = UnsafeRow.createFromByteArray(64, 2)
+    val byteArray = new Array[Byte](64)
+
+    // Out of bounds
+    var errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, 32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        "Invalid byte array backed UnsafeRow: byte array length=64, offset=50, 
byte size=32"
+      )
+    )
+
+    // Negative size
+    errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, -32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        "Invalid byte array backed UnsafeRow: byte array length=64, offset=50, 
byte size=-32"
+      )
+    )
+
+    // Negative offset
+    errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, -5, 32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        s"Invalid byte array backed UnsafeRow: byte array length=64, " +
+          s"offset=${-5 - Platform.BYTE_ARRAY_OFFSET}, byte size=32"
+      )
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to