This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch 56_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/56_maintenance by this push:
     new 1e7097ff91 [56_maintenance] Prevent FixedSizeBinaryArray i32 offset 
overflows (#9872) (#9917)
1e7097ff91 is described below

commit 1e7097ff910a61066531dfbb1db803db77acd6e0
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 6 11:12:49 2026 -0400

    [56_maintenance] Prevent FixedSizeBinaryArray i32 offset overflows (#9872) 
(#9917)
    
    - Part of https://github.com/apache/arrow-rs/issues/9857
    - Fixes https://github.com/apache/arrow-rs/issues/9898 in 56.x releases
    
    This PR:
    - Backports https://github.com/apache/arrow-rs/pull/9872 from @alamb to
    the `56_maintenance` line
    - Supersedes the earlier closed attempt
    https://github.com/apache/arrow-rs/pull/9850 referenced by the issue
---
 arrow-array/src/array/fixed_size_binary_array.rs | 122 ++++++++++++++++++++---
 1 file changed, 108 insertions(+), 14 deletions(-)

diff --git a/arrow-array/src/array/fixed_size_binary_array.rs 
b/arrow-array/src/array/fixed_size_binary_array.rs
index 76d9db0470..42691eeafa 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -71,7 +71,8 @@ impl FixedSizeBinaryArray {
     /// Create a new [`Scalar`] from `value`
     pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
         let v = value.as_ref();
-        Scalar::new(Self::new(v.len() as _, Buffer::from(v), None))
+        let size = i32::try_from(v.len()).expect("FixedSizeBinaryArray value 
length exceeds i32");
+        Scalar::new(Self::new(size, Buffer::from(v), None))
     }
 
     /// Create a new [`FixedSizeBinaryArray`] from the provided parts, 
returning an error on failure
@@ -80,6 +81,7 @@ impl FixedSizeBinaryArray {
     ///
     /// * `size < 0`
     /// * `values.len() / size != nulls.len()`
+    /// * `len * size > i32::MAX`
     pub fn try_new(
         size: i32,
         values: Buffer,
@@ -101,6 +103,8 @@ impl FixedSizeBinaryArray {
             }
         }
 
+        Self::validate_lengths(s, len)?;
+
         Ok(Self {
             data_type,
             value_data: values,
@@ -110,6 +114,33 @@ impl FixedSizeBinaryArray {
         })
     }
 
+    /// Some calculations below use i32 arithmetic which can overflow when
+    /// valid offsets are past i32::MAX. Until that is solved for real do not
+    /// permit constructing any FixedSizeBinaryArray that has a valid offset
+    /// past i32::MAX
+    fn validate_lengths(value_size: usize, len: usize) -> Result<(), 
ArrowError> {
+        // the offset is also calculated for the next element (i + 1) so
+        // check `len` (not last element index) to ensure that all offsets are 
valid
+        let max_offset = value_size.checked_mul(len).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {value_size} * len 
{len} exceeds maximum valid offset"
+            ))
+        })?;
+
+        let max_valid_offset: usize = i32::MAX.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: maximum valid offset exceeds 
i32::MAX, got {max_offset}"
+            ))
+        })?;
+
+        if max_offset > max_valid_offset {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {value_size} * length 
{len} exceeds maximum valid offset of {max_valid_offset}"
+            )));
+        };
+        Ok(())
+    }
+
     /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values 
are null
     ///
     /// # Panics
@@ -118,11 +149,17 @@ impl FixedSizeBinaryArray {
     ///
     /// * `size < 0`
     /// * `size * len` would overflow `usize`
+    /// * `size * len > i32::MAX`
+    /// * `size * len * 8` would overflow `usize`
     pub fn new_null(size: i32, len: usize) -> Self {
-        let capacity = size.to_usize().unwrap().checked_mul(len).unwrap();
+        const BITS_IN_A_BYTE: usize = 8;
+        let size_usize = size.to_usize().unwrap();
+        Self::validate_lengths(size_usize, len).unwrap();
+        let capacity_in_bytes = size_usize.checked_mul(len).unwrap();
+        let capacity_in_bits = 
capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
         Self {
             data_type: DataType::FixedSizeBinary(size),
-            value_data: MutableBuffer::new(capacity).into(),
+            value_data: MutableBuffer::new_null(capacity_in_bits).into(),
             nulls: Some(NullBuffer::new_null(len)),
             value_length: size,
             len,
@@ -288,8 +325,16 @@ impl FixedSizeBinaryArray {
                     // Now that we know how large each element is we can 
reserve
                     // sufficient capacity in the underlying mutable buffer for
                     // the data.
-                    buffer.reserve(iter_size_hint * len);
-                    buffer.extend_zeros(slice.len() * prepend);
+                    if let Some(capacity) = iter_size_hint.checked_mul(len) {
+                        buffer.reserve(capacity);
+                    }
+                    let prepend_zeros = 
slice.len().checked_mul(prepend).ok_or_else(|| {
+                        ArrowError::InvalidArgumentError(format!(
+                            "FixedSizeBinaryArray error: value size {} * 
prepend {prepend} exceeds usize",
+                            slice.len()
+                        ))
+                    })?;
+                    buffer.extend_zeros(prepend_zeros);
                 }
                 bit_util::set_bit(null_buf.as_slice_mut(), len);
                 buffer.extend_from_slice(slice);
@@ -313,7 +358,13 @@ impl FixedSizeBinaryArray {
         let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
         let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() 
> 0);
 
-        let size = size.unwrap_or(0) as i32;
+        let size = size.unwrap_or(0);
+        Self::validate_lengths(size, len)?;
+        let size = size.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray value length exceeds i32, got {size}"
+            ))
+        })?;
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
             value_data: buffer.into(),
@@ -352,12 +403,20 @@ impl FixedSizeBinaryArray {
         T: Iterator<Item = Option<U>>,
         U: AsRef<[u8]>,
     {
+        let size_usize = size.to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, 
got {size}"))
+        })?;
         let mut len = 0;
         let mut byte = 0;
 
         let iter_size_hint = iter.size_hint().0;
         let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 
8));
-        let mut buffer = MutableBuffer::new(iter_size_hint * (size as usize));
+        let capacity = iter_size_hint.checked_mul(size_usize).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray error: value size {size_usize} * len 
hint {iter_size_hint} exceeds usize"
+            ))
+        })?;
+        let mut buffer = MutableBuffer::new(capacity);
 
         iter.try_for_each(|item| -> Result<(), ArrowError> {
             // extend null bitmask by one byte per each 8 items
@@ -369,7 +428,7 @@ impl FixedSizeBinaryArray {
 
             if let Some(slice) = item {
                 let slice = slice.as_ref();
-                if size as usize != slice.len() {
+                if size_usize != slice.len() {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "Nested array size mismatch: one is {}, and the other 
is {}",
                         size,
@@ -380,7 +439,7 @@ impl FixedSizeBinaryArray {
                 bit_util::set_bit(null_buf.as_slice_mut(), len);
                 buffer.extend_from_slice(slice);
             } else {
-                buffer.extend_zeros(size as usize);
+                buffer.extend_zeros(size_usize);
             }
 
             len += 1;
@@ -390,6 +449,7 @@ impl FixedSizeBinaryArray {
 
         let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
         let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() 
> 0);
+        Self::validate_lengths(size_usize, len)?;
 
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
@@ -440,7 +500,9 @@ impl FixedSizeBinaryArray {
             } else {
                 let len = slice.len();
                 size = Some(len);
-                buffer.reserve(iter_size_hint * len);
+                if let Some(capacity) = iter_size_hint.checked_mul(len) {
+                    buffer.reserve(capacity);
+                }
             }
 
             buffer.extend_from_slice(slice);
@@ -456,7 +518,13 @@ impl FixedSizeBinaryArray {
             ));
         }
 
-        let size = size.unwrap_or(0).try_into().unwrap();
+        let size = size.unwrap_or(0);
+        Self::validate_lengths(size, len)?;
+        let size = size.try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "FixedSizeBinaryArray value length exceeds i32, got {size}"
+            ))
+        })?;
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
             value_data: buffer.into(),
@@ -489,9 +557,15 @@ impl From<ArrayData> for FixedSizeBinaryArray {
             _ => panic!("Expected data type to be FixedSizeBinary"),
         };
 
-        let size = value_length as usize;
-        let value_data =
-            data.buffers()[0].slice_with_length(data.offset() * size, 
data.len() * size);
+        let size = value_length
+            .to_usize()
+            .expect("FixedSizeBinaryArray value length must be non-negative");
+        Self::validate_lengths(size, data.len())
+            .expect("FixedSizeBinaryArray offsets must fit within i32");
+        let value_data = data.buffers()[0].slice_with_length(
+            data.offset().checked_mul(size).expect("offset overflow"),
+            data.len().checked_mul(size).expect("length overflow"),
+        );
 
         Self {
             data_type: data.data_type().clone(),
@@ -972,6 +1046,26 @@ mod tests {
         array.value(4);
     }
 
+    #[test]
+    fn test_validate_lengths_allows_empty_array() {
+        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();
+    }
+
+    #[test]
+    fn test_validate_lengths_allows_i32_max_offset() {
+        FixedSizeBinaryArray::validate_lengths(1, i32::MAX as usize).unwrap();
+        FixedSizeBinaryArray::validate_lengths(262_176, 8191).unwrap();
+    }
+
+    #[test]
+    fn test_validate_lengths_rejects_offset_past_i32_max() {
+        let err = FixedSizeBinaryArray::validate_lengths(262_177, 
8192).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: FixedSizeBinaryArray error: value size 
262177 * length 8192 exceeds maximum valid offset of 2147483647",
+        );
+    }
+
     #[test]
     fn test_constructors() {
         let buffer = Buffer::from_vec(vec![0_u8; 10]);

Reply via email to