This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch 56_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/56_maintenance by this push:
new aeb648c1b2 [56_maintenance] Prevent buffer builder length overflow in
MutableBuffer::extend_zeros (#9820) (#9915)
aeb648c1b2 is described below
commit aeb648c1b24a7bb8add937b01b083344438e3a77
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 6 10:54:06 2026 -0400
[56_maintenance] Prevent buffer builder length overflow in
MutableBuffer::extend_zeros (#9820) (#9915)
- Part of https://github.com/apache/arrow-rs/issues/9857
- Fixes https://github.com/apache/arrow-rs/issues/9897 in 56.x releases
This PR:
- Backports https://github.com/apache/arrow-rs/pull/9820 from @alamb to
the `56_maintenance` line
---
arrow-buffer/src/buffer/mutable.rs | 95 +++++++++++++++++++++++++++++++++++---
arrow-buffer/src/builder/mod.rs | 24 ++++++++++
2 files changed, 113 insertions(+), 6 deletions(-)
diff --git a/arrow-buffer/src/buffer/mutable.rs
b/arrow-buffer/src/buffer/mutable.rs
index 63fdbf598b..31bb520714 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -72,6 +72,10 @@ impl MutableBuffer {
/// Allocate a new [MutableBuffer] with initial capacity to be at least
`capacity`.
///
/// See [`MutableBuffer::with_capacity`].
+ ///
+ /// # Panics
+ ///
+ /// See [`MutableBuffer::with_capacity`].
#[inline]
pub fn new(capacity: usize) -> Self {
Self::with_capacity(capacity)
@@ -107,6 +111,7 @@ impl MutableBuffer {
/// Allocates a new [MutableBuffer] with `len` and capacity to be at least
`len` where
/// all bytes are guaranteed to be `0u8`.
+ ///
/// # Example
/// ```
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
@@ -116,6 +121,10 @@ impl MutableBuffer {
/// let data = buffer.as_slice_mut();
/// assert_eq!(data[126], 0u8);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if `len` is too large to construct a valid allocation [`Layout`]
pub fn from_len_zeroed(len: usize) -> Self {
let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
let data = match layout.size() {
@@ -159,6 +168,10 @@ impl MutableBuffer {
/// creates a new [MutableBuffer] with capacity and length capable of
holding `len` bits.
/// This is useful to create a buffer for packed bitmaps.
+ ///
+ /// # Panics
+ ///
+ /// See [`MutableBuffer::from_len_zeroed`].
pub fn new_null(len: usize) -> Self {
let num_bytes = bit_util::ceil(len, 8);
MutableBuffer::from_len_zeroed(num_bytes)
@@ -170,6 +183,10 @@ impl MutableBuffer {
/// This is useful when one wants to clear (or set) the bits and then
manipulate
/// the buffer directly (e.g., modifying the buffer by holding a mutable
reference
/// from `data_mut()`).
+ ///
+ /// # Panics
+ ///
+ /// Panics if `end` exceeds the buffer capacity.
pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
assert!(end <= self.layout.size());
let v = if val { 255 } else { 0 };
@@ -185,6 +202,10 @@ impl MutableBuffer {
/// This is used to initialize the bits in a buffer, however, it has no
impact on the
/// `len` of the buffer and so can be used to initialize the memory region
from
/// `len` to `capacity`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the byte range `start..start + count` exceeds the buffer
capacity.
pub fn set_null_bits(&mut self, start: usize, count: usize) {
assert!(
start.saturating_add(count) <= self.layout.size(),
@@ -210,11 +231,19 @@ impl MutableBuffer {
/// let buffer: Buffer = buffer.into();
/// assert_eq!(buffer.len(), 253);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if `self.len + additional` overflows `usize`, or if the
required capacity is too
+ /// large to round up to the next 64-byte boundary and construct a valid
allocation layout.
// For performance reasons, this must be inlined so that the `if` is
executed inside the caller, and not as an extra call that just
// exits.
#[inline(always)]
pub fn reserve(&mut self, additional: usize) {
- let required_cap = self.len + additional;
+ let required_cap = self
+ .len
+ .checked_add(additional)
+ .expect("buffer length overflow");
if required_cap > self.layout.size() {
let new_capacity =
bit_util::round_upto_multiple_of_64(required_cap);
let new_capacity = std::cmp::max(new_capacity, self.layout.size()
* 2);
@@ -276,6 +305,11 @@ impl MutableBuffer {
/// buffer.resize(253, 2); // allocates for the first time
/// assert_eq!(buffer.as_slice()[252], 2u8);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if growing the buffer requires reserving a capacity that fails
for the same
+ /// reasons as [`MutableBuffer::reserve`].
// For performance reasons, this must be inlined so that the `if` is
executed inside the caller, and not as an extra call that just
// exits.
#[inline(always)]
@@ -311,6 +345,11 @@ impl MutableBuffer {
/// buffer.shrink_to_fit();
/// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the current length is too large to round up to the next
64-byte boundary and
+ /// construct a valid allocation layout.
pub fn shrink_to_fit(&mut self) {
let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
if new_capacity < self.layout.size() {
@@ -384,8 +423,8 @@ impl MutableBuffer {
///
/// # Panics
///
- /// This function panics if the underlying buffer is not aligned
- /// correctly for type `T`.
+ /// This function panics if the underlying buffer is not aligned correctly
for type `T`, or
+ /// if its length is not a multiple of `size_of::<T>()`.
pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
// SAFETY
// ArrowNativeType is trivially transmutable, is sealed to prevent
potentially incorrect
@@ -399,8 +438,8 @@ impl MutableBuffer {
///
/// # Panics
///
- /// This function panics if the underlying buffer is not aligned
- /// correctly for type `T`.
+ /// This function panics if the underlying buffer is not aligned correctly
for type `T`, or
+ /// if its length is not a multiple of `size_of::<T>()`.
pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
// SAFETY
// ArrowNativeType is trivially transmutable, is sealed to prevent
potentially incorrect
@@ -418,6 +457,11 @@ impl MutableBuffer {
/// buffer.extend_from_slice(&[2u32, 0]);
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if extending the buffer requires reserving a capacity that
fails for the same
+ /// reasons as [`MutableBuffer::reserve`].
#[inline]
pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
let additional = mem::size_of_val(items);
@@ -441,6 +485,11 @@ impl MutableBuffer {
/// buffer.push(256u32);
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if extending the buffer requires reserving a capacity that
fails for the same
+ /// reasons as [`MutableBuffer::reserve`].
#[inline]
pub fn push<T: ToByteSlice>(&mut self, item: T) {
let additional = std::mem::size_of::<T>();
@@ -466,13 +515,26 @@ impl MutableBuffer {
}
/// Extends the buffer by `additional` bytes equal to `0u8`, incrementing
its capacity if needed.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `self.len + additional` overflows `usize`, or if growing the
buffer requires
+ /// reserving a capacity that fails for the same reasons as
[`MutableBuffer::reserve`].
#[inline]
pub fn extend_zeros(&mut self, additional: usize) {
- self.resize(self.len + additional, 0);
+ let new_len = self
+ .len
+ .checked_add(additional)
+ .expect("buffer length overflow");
+ self.resize(new_len, 0);
}
/// # Safety
/// The caller must ensure that the buffer was properly initialized up to
`len`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `len` exceeds the buffer capacity.
#[inline]
pub unsafe fn set_len(&mut self, len: usize) {
assert!(len <= self.capacity());
@@ -618,6 +680,13 @@ impl MutableBuffer {
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if the
+ /// reported length does not match the number of items produced, or if
allocating the
+ /// required buffer fails for the same reasons as [`MutableBuffer::new`].
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
@@ -662,6 +731,12 @@ impl MutableBuffer {
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter)
};
/// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if it yields
+ /// fewer items than reported.
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
@@ -680,6 +755,14 @@ impl MutableBuffer {
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted
(upper) length or errors
/// if any of the items of the iterator is an error.
/// Prefer this to `collect` whenever possible, as it is faster ~60%
faster.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if the
+ /// reported length does not match the number of items produced before an
error-free finish,
+ /// or if allocating the required buffer fails for the same reasons as
+ /// [`MutableBuffer::new`].
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index abe510bdab..d58e4b7a09 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -418,4 +418,28 @@ mod tests {
builder.extend([3, 4]);
assert_eq!(builder.len(), 4);
}
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn reserve_length_overflow() {
+ let mut builder = BufferBuilder::<u8>::new(1);
+ builder.append(0);
+ builder.reserve(usize::MAX);
+ }
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn append_n_zeroed_length_overflow() {
+ let mut builder = BufferBuilder::<u64>::new(1);
+ builder.append_n_zeroed(1);
+ builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
+ }
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn advance_length_overflow() {
+ let mut builder = BufferBuilder::<u64>::new(1);
+ builder.advance(1);
+ builder.advance(usize::MAX / mem::size_of::<u64>());
+ }
}