This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch 57_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/57_maintenance by this push:
     new 2c35fceb3a [57_maintenance] Prevent buffer builder length overflow in 
MutableBuffer::extend_zeros (#9820) (#9926)
2c35fceb3a is described below

commit 2c35fceb3a73a31154deaa2912bbf23c740b4f05
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 6 11:03:15 2026 -0400

    [57_maintenance] Prevent buffer builder length overflow in 
MutableBuffer::extend_zeros (#9820) (#9926)
    
    - Part of https://github.com/apache/arrow-rs/issues/9858
    - Fixes https://github.com/apache/arrow-rs/issues/9897 in 57.x releases
    
    This PR:
    - Backports https://github.com/apache/arrow-rs/pull/9820 from @alamb to
    the `57_maintenance` line
    
    Note: `MutableBuffer::extend_bool_trusted_len` is not present on the
    `57_maintenance` line, so this backport omits the hunk for that API.
---
 arrow-buffer/src/buffer/mutable.rs | 101 ++++++++++++++++++++++++++++++++++---
 arrow-buffer/src/builder/mod.rs    |  24 +++++++++
 2 files changed, 119 insertions(+), 6 deletions(-)

diff --git a/arrow-buffer/src/buffer/mutable.rs 
b/arrow-buffer/src/buffer/mutable.rs
index 75f77242b9..926a874219 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -112,6 +112,10 @@ impl MutableBuffer {
     /// Allocate a new [MutableBuffer] with initial capacity to be at least 
`capacity`.
     ///
     /// See [`MutableBuffer::with_capacity`].
+    ///
+    /// # Panics
+    ///
+    /// See [`MutableBuffer::with_capacity`].
     #[inline]
     pub fn new(capacity: usize) -> Self {
         Self::with_capacity(capacity)
@@ -147,6 +151,7 @@ impl MutableBuffer {
 
     /// Allocates a new [MutableBuffer] with `len` and capacity to be at least 
`len` where
     /// all bytes are guaranteed to be `0u8`.
+    ///
     /// # Example
     /// ```
     /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
@@ -156,6 +161,10 @@ impl MutableBuffer {
     /// let data = buffer.as_slice_mut();
     /// assert_eq!(data[126], 0u8);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `len` is too large to construct a valid allocation [`Layout`]
     pub fn from_len_zeroed(len: usize) -> Self {
         let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
         let data = match layout.size() {
@@ -199,6 +208,10 @@ impl MutableBuffer {
 
     /// creates a new [MutableBuffer] with capacity and length capable of 
holding `len` bits.
     /// This is useful to create a buffer for packed bitmaps.
+    ///
+    /// # Panics
+    ///
+    /// See [`MutableBuffer::from_len_zeroed`].
     pub fn new_null(len: usize) -> Self {
         let num_bytes = bit_util::ceil(len, 8);
         MutableBuffer::from_len_zeroed(num_bytes)
@@ -210,6 +223,10 @@ impl MutableBuffer {
     /// This is useful when one wants to clear (or set) the bits and then 
manipulate
     /// the buffer directly (e.g., modifying the buffer by holding a mutable 
reference
     /// from `data_mut()`).
+    ///
+    /// # Panics
+    ///
+    /// Panics if `end` exceeds the buffer capacity.
     pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
         assert!(end <= self.layout.size());
         let v = if val { 255 } else { 0 };
@@ -225,6 +242,10 @@ impl MutableBuffer {
     /// This is used to initialize the bits in a buffer, however, it has no 
impact on the
     /// `len` of the buffer and so can be used to initialize the memory region 
from
     /// `len` to `capacity`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the byte range `start..start + count` exceeds the buffer 
capacity.
     pub fn set_null_bits(&mut self, start: usize, count: usize) {
         assert!(
             start.saturating_add(count) <= self.layout.size(),
@@ -250,11 +271,19 @@ impl MutableBuffer {
     /// let buffer: Buffer = buffer.into();
     /// assert_eq!(buffer.len(), 253);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `self.len + additional` overflows `usize`, or if the 
required capacity is too
+    /// large to round up to the next 64-byte boundary and construct a valid 
allocation layout.
     // For performance reasons, this must be inlined so that the `if` is 
executed inside the caller, and not as an extra call that just
     // exits.
     #[inline(always)]
     pub fn reserve(&mut self, additional: usize) {
-        let required_cap = self.len + additional;
+        let required_cap = self
+            .len
+            .checked_add(additional)
+            .expect("buffer length overflow");
         if required_cap > self.layout.size() {
             let new_capacity = 
bit_util::round_upto_multiple_of_64(required_cap);
             let new_capacity = std::cmp::max(new_capacity, self.layout.size() 
* 2);
@@ -274,6 +303,12 @@ impl MutableBuffer {
     /// buffer.repeat_slice_n_times(bytes_to_repeat, 3);
     /// assert_eq!(buffer.as_slice(), b"ababab");
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the repeated slice byte length overflows `usize`, if the 
resulting buffer
+    /// length overflows `usize`, or if reserving the required capacity fails 
for the same
+    /// reasons as [`MutableBuffer::reserve`].
     pub fn repeat_slice_n_times<T: ArrowNativeType>(
         &mut self,
         slice_to_repeat: &[T],
@@ -385,6 +420,11 @@ impl MutableBuffer {
     /// buffer.resize(253, 2); // allocates for the first time
     /// assert_eq!(buffer.as_slice()[252], 2u8);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if growing the buffer requires reserving a capacity that fails 
for the same
+    /// reasons as [`MutableBuffer::reserve`].
     // For performance reasons, this must be inlined so that the `if` is 
executed inside the caller, and not as an extra call that just
     // exits.
     #[inline(always)]
@@ -420,6 +460,11 @@ impl MutableBuffer {
     /// buffer.shrink_to_fit();
     /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the current length is too large to round up to the next 
64-byte boundary and
+    /// construct a valid allocation layout.
     pub fn shrink_to_fit(&mut self) {
         let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
         if new_capacity < self.layout.size() {
@@ -493,8 +538,8 @@ impl MutableBuffer {
     ///
     /// # Panics
     ///
-    /// This function panics if the underlying buffer is not aligned
-    /// correctly for type `T`.
+    /// This function panics if the underlying buffer is not aligned correctly 
for type `T`, or
+    /// if its length is not a multiple of `size_of::<T>()`.
     pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
         // SAFETY
         // ArrowNativeType is trivially transmutable, is sealed to prevent 
potentially incorrect
@@ -508,8 +553,8 @@ impl MutableBuffer {
     ///
     /// # Panics
     ///
-    /// This function panics if the underlying buffer is not aligned
-    /// correctly for type `T`.
+    /// This function panics if the underlying buffer is not aligned correctly 
for type `T`, or
+    /// if its length is not a multiple of `size_of::<T>()`.
     pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
         // SAFETY
         // ArrowNativeType is trivially transmutable, is sealed to prevent 
potentially incorrect
@@ -527,6 +572,11 @@ impl MutableBuffer {
     /// buffer.extend_from_slice(&[2u32, 0]);
     /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if extending the buffer requires reserving a capacity that 
fails for the same
+    /// reasons as [`MutableBuffer::reserve`].
     #[inline]
     pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
         let additional = mem::size_of_val(items);
@@ -550,6 +600,11 @@ impl MutableBuffer {
     /// buffer.push(256u32);
     /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if extending the buffer requires reserving a capacity that 
fails for the same
+    /// reasons as [`MutableBuffer::reserve`].
     #[inline]
     pub fn push<T: ToByteSlice>(&mut self, item: T) {
         let additional = std::mem::size_of::<T>();
@@ -575,13 +630,26 @@ impl MutableBuffer {
     }
 
     /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing 
its capacity if needed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `self.len + additional` overflows `usize`, or if growing the 
buffer requires
+    /// reserving a capacity that fails for the same reasons as 
[`MutableBuffer::reserve`].
     #[inline]
     pub fn extend_zeros(&mut self, additional: usize) {
-        self.resize(self.len + additional, 0);
+        let new_len = self
+            .len
+            .checked_add(additional)
+            .expect("buffer length overflow");
+        self.resize(new_len, 0);
     }
 
     /// # Safety
     /// The caller must ensure that the buffer was properly initialized up to 
`len`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `len` exceeds the buffer capacity.
     #[inline]
     pub unsafe fn set_len(&mut self, len: usize) {
         assert!(len <= self.capacity());
@@ -726,6 +794,13 @@ impl MutableBuffer {
     /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
     /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if the
+    /// reported length does not match the number of items produced, or if 
allocating the
+    /// required buffer fails for the same reasons as [`MutableBuffer::new`].
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
@@ -770,6 +845,12 @@ impl MutableBuffer {
     /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) 
};
     /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
     /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if it yields
+    /// fewer items than reported.
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
@@ -788,6 +869,14 @@ impl MutableBuffer {
     /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted 
(upper) length or errors
     /// if any of the items of the iterator is an error.
     /// Prefer this to `collect` whenever possible, as it is faster ~60% 
faster.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound via `size_hint`, 
or if the
+    /// reported length does not match the number of items produced before an 
error-free finish,
+    /// or if allocating the required buffer fails for the same reasons as
+    /// [`MutableBuffer::new`].
+    ///
     /// # Safety
     /// This method assumes that the iterator's size is correct and is 
undefined behavior
     /// to use it on an iterator that reports an incorrect length.
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index e44397258f..0c58f2c5c7 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -426,4 +426,28 @@ mod tests {
         let slice = builder.as_slice_mut();
         assert_eq!(slice.len(), 222);
     }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn reserve_length_overflow() {
+        let mut builder = BufferBuilder::<u8>::new(1);
+        builder.append(0);
+        builder.reserve(usize::MAX);
+    }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn append_n_zeroed_length_overflow() {
+        let mut builder = BufferBuilder::<u64>::new(1);
+        builder.append_n_zeroed(1);
+        builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
+    }
+
+    #[test]
+    #[should_panic(expected = "buffer length overflow")]
+    fn advance_length_overflow() {
+        let mut builder = BufferBuilder::<u64>::new(1);
+        builder.advance(1);
+        builder.advance(usize::MAX / mem::size_of::<u64>());
+    }
 }

Reply via email to