This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 75f7916d2a Prevent `FixedSizeBinaryArray` `i32` offset overflows (try 
2) (#9872)
75f7916d2a is described below

commit 75f7916d2ae8d11320451f1c3e05680d69865d5f
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon May 4 16:39:38 2026 -0400

    Prevent `FixedSizeBinaryArray` `i32` offset overflows (try 2) (#9872)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/pull/9850
    
    # Rationale for this change
    
    `FixedSizeBinaryArray::value_offset_at` works use `i32` arithmetic which
    can overflow. For offsets beyond `i32::MAX`, that can be bad
    
    # What changes are included in this PR?
    
    1. Prevent any FixedSizedBinaryArrays from being constructed where the
    offset calculation could overflow
    2. Add some other overflow checks
    
    As @adamreeve [pointed
    out](https://github.com/apache/arrow-rs/pull/9850#discussion_r3164949680)
    on https://github.com/apache/arrow-rs/pull/9850 there are several places
    where the `i32` arithmetic is problematic in `FixedSizeBinaryArray`. I
    will fix them for real in a different, follow on PR, by switching to
    entirely `usize` based arithmetic for offset calculations
    
    However, since I hope to backport this PR to older releases, I would
    like something that is easy to review and has the least potential for
    unintended consequences.
    
    # Are these changes tested?
    
    I added unit tests. However, I can't find any way to fully trigger the
    actual paths short of trying to allocate very large arrays, which I
    don't think is appropriate for unit tests.
    
    # Are there any user-facing changes?
    Better limit checking
---
 arrow-array/src/array/fixed_size_binary_array.rs | 120 ++++++++++++++++++++---
 1 file changed, 107 insertions(+), 13 deletions(-)

diff --git a/arrow-array/src/array/fixed_size_binary_array.rs 
b/arrow-array/src/array/fixed_size_binary_array.rs
index 928aa38a27..3c3dc8f589 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -108,7 +108,8 @@ impl FixedSizeBinaryArray {
     /// Create a new [`Scalar`] from `value`
     pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
         let v = value.as_ref();
-        Scalar::new(Self::new(v.len() as _, Buffer::from(v), None))
+        let size = i32::try_from(v.len()).expect("FixedSizeBinaryArray value 
length exceeds i32");
+        Scalar::new(Self::new(size, Buffer::from(v), None))
     }
 
     /// Create a new [`FixedSizeBinaryArray`] from the provided parts, 
returning an error on failure
@@ -121,6 +122,7 @@ impl FixedSizeBinaryArray {
     /// * `size < 0`
     /// * `values.len() / size != nulls.len()`
     /// * `size == 0 && values.len() != 0`
+    /// * `len * size > i32::MAX`
     pub fn try_new(
         size: i32,
         values: Buffer,
@@ -157,6 +159,8 @@ impl FixedSizeBinaryArray {
             }
         };
 
+        Self::validate_lengths(s, len)?;
+
         Ok(Self {
             data_type,
             value_data: values,
@@ -166,6 +170,33 @@ impl FixedSizeBinaryArray {
         })
     }
 
+    /// Some calculations below use i32 arithmetic which can overflow when
+    /// valid offsets are past i32::MAX. Until that is solved for real do not
+    /// permit constructing any FixedSizeBinaryArray that has a valid offset
+    /// past i32::MAX
+    fn validate_lengths(value_size: usize, len: usize) -> Result<(), 
ArrowError> {
+        // the offset is also calculated for the next element (i + 1) so
+        // check `len` (not last element index) to ensure that all offsets are 
valid
+        let max_offset = value_size.checked_mul(len).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {value_size} * len 
{len} exceeds maximum valid offset"
+            ))
+        })?;
+
+        let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: maximum valid offset exceeds 
i32::MAX, got {max_offset}"
+            ))
+        })?;
+
+        if max_offset > max_valid_offset {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {value_size} * length 
{len} exceeds maximum valid offset of {max_valid_offset}"
+            )));
+        };
+        Ok(())
+    }
+
     /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values 
are null
     ///
     /// # Panics
@@ -174,12 +205,17 @@ impl FixedSizeBinaryArray {
     ///
     /// * `size < 0`
     /// * `size * len` would overflow `usize`
+    /// * `size * len > i32::MAX`
+    /// * `size * len * 8` would overflow `usize`
     pub fn new_null(size: i32, len: usize) -> Self {
         const BITS_IN_A_BYTE: usize = 8;
-        let capacity_in_bytes = 
size.to_usize().unwrap().checked_mul(len).unwrap();
+        let size_usize = size.to_usize().unwrap();
+        Self::validate_lengths(size_usize, len).unwrap();
+        let capacity_in_bytes = size_usize.checked_mul(len).unwrap();
+        let capacity_in_bits = 
capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
         Self {
             data_type: DataType::FixedSizeBinary(size),
-            value_data: MutableBuffer::new_null(capacity_in_bytes * 
BITS_IN_A_BYTE).into(),
+            value_data: MutableBuffer::new_null(capacity_in_bits).into(),
             nulls: Some(NullBuffer::new_null(len)),
             value_length: size,
             len,
@@ -347,8 +383,16 @@ impl FixedSizeBinaryArray {
                     // Now that we know how large each element is we can 
reserve
                     // sufficient capacity in the underlying mutable buffer for
                     // the data.
-                    buffer.reserve(iter_size_hint * len);
-                    buffer.extend_zeros(slice.len() * prepend);
+                    if let Some(capacity) = iter_size_hint.checked_mul(len) {
+                        buffer.reserve(capacity);
+                    }
+                    let prepend_zeros = 
slice.len().checked_mul(prepend).ok_or_else(|| {
+                        ArrowError::InvalidArgumentError(format!(
+                            "FixedSizeBinaryArray error: value size {} * 
prepend {prepend} exceeds usize",
+                            slice.len()
+                        ))
+                    })?;
+                    buffer.extend_zeros(prepend_zeros);
                 }
                 bit_util::set_bit(null_buf.as_slice_mut(), len);
                 buffer.extend_from_slice(slice);
@@ -371,7 +415,13 @@ impl FixedSizeBinaryArray {
 
         let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
 
-        let size = size.unwrap_or(0) as i32;
+        let size = size.unwrap_or(0);
+        Self::validate_lengths(size, len)?;
+        let size = size.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray value length exceeds i32, got {size}"
+            ))
+        })?;
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
             value_data: buffer.into(),
@@ -410,12 +460,20 @@ impl FixedSizeBinaryArray {
         T: Iterator<Item = Option<U>>,
         U: AsRef<[u8]>,
     {
+        let size_usize = size.to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, 
got {size}"))
+        })?;
         let mut len = 0;
         let mut byte = 0;
 
         let iter_size_hint = iter.size_hint().0;
         let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 
8));
-        let mut buffer = MutableBuffer::new(iter_size_hint * (size as usize));
+        let capacity = iter_size_hint.checked_mul(size_usize).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {size_usize} * len 
hint {iter_size_hint} exceeds usize"
+            ))
+        })?;
+        let mut buffer = MutableBuffer::new(capacity);
 
         iter.try_for_each(|item| -> Result<(), ArrowError> {
             // extend null bitmask by one byte per each 8 items
@@ -427,7 +485,7 @@ impl FixedSizeBinaryArray {
 
             if let Some(slice) = item {
                 let slice = slice.as_ref();
-                if size as usize != slice.len() {
+                if size_usize != slice.len() {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "Nested array size mismatch: one is {}, and the other 
is {}",
                         size,
@@ -438,7 +496,7 @@ impl FixedSizeBinaryArray {
                 bit_util::set_bit(null_buf.as_slice_mut(), len);
                 buffer.extend_from_slice(slice);
             } else {
-                buffer.extend_zeros(size as usize);
+                buffer.extend_zeros(size_usize);
             }
 
             len += 1;
@@ -447,6 +505,7 @@ impl FixedSizeBinaryArray {
         })?;
 
         let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
+        Self::validate_lengths(size_usize, len)?;
 
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
@@ -497,7 +556,9 @@ impl FixedSizeBinaryArray {
             } else {
                 let len = slice.len();
                 size = Some(len);
-                buffer.reserve(iter_size_hint * len);
+                if let Some(capacity) = iter_size_hint.checked_mul(len) {
+                    buffer.reserve(capacity);
+                }
             }
 
             buffer.extend_from_slice(slice);
@@ -513,7 +574,13 @@ impl FixedSizeBinaryArray {
             ));
         }
 
-        let size = size.unwrap_or(0).try_into().unwrap();
+        let size = size.unwrap_or(0);
+        Self::validate_lengths(size, len)?;
+        let size = size.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray value length exceeds i32, got {size}"
+            ))
+        })?;
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
             value_data: buffer.into(),
@@ -548,8 +615,15 @@ impl From<ArrayData> for FixedSizeBinaryArray {
             _ => panic!("Expected data type to be FixedSizeBinary"),
         };
 
-        let size = value_length as usize;
-        let value_data = buffers[0].slice_with_length(offset * size, len * 
size);
+        let size = value_length
+            .to_usize()
+            .expect("FixedSizeBinaryArray value length must be non-negative");
+        Self::validate_lengths(size, len)
+            .expect("FixedSizeBinaryArray offsets must fit within i32");
+        let value_data = buffers[0].slice_with_length(
+            offset.checked_mul(size).expect("offset overflow"),
+            len.checked_mul(size).expect("length overflow"),
+        );
 
         Self {
             data_type,
@@ -1040,6 +1114,26 @@ mod tests {
         array.value(4);
     }
 
+    #[test]
+    fn test_validate_lengths_allows_empty_array() {
+        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();
+    }
+
+    #[test]
+    fn test_validate_lengths_allows_i32_max_offset() {
+        FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap();
+        FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap();
+    }
+
+    #[test]
+    fn test_validate_lengths_rejects_offset_past_i32_max() {
+        let err = FixedSizeBinaryArray::validate_lengths(262_177, 
8192).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: FixedSizeBinaryArray error: value size 
262177 * length 8192 exceeds maximum valid offset of 2147483647",
+        );
+    }
+
     #[test]
     fn test_constructors() {
         let buffer = Buffer::from_vec(vec![0_u8; 10]);

Reply via email to