This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch 57_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/57_maintenance by this push:
new 2c35fceb3a [57_maintenance] Prevent buffer builder length overflow in
MutableBuffer::extend_zeros (#9820) (#9926)
2c35fceb3a is described below
commit 2c35fceb3a73a31154deaa2912bbf23c740b4f05
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed May 6 11:03:15 2026 -0400
[57_maintenance] Prevent buffer builder length overflow in
MutableBuffer::extend_zeros (#9820) (#9926)
- Part of https://github.com/apache/arrow-rs/issues/9858
- Fixes https://github.com/apache/arrow-rs/issues/9897 in 57.x releases
This PR:
- Backports https://github.com/apache/arrow-rs/pull/9820 from @alamb to
the `57_maintenance` line
Note: `MutableBuffer::extend_bool_trusted_len` is not present on the
`57_maintenance` line, so this backport omits the hunk for that API.
---
arrow-buffer/src/buffer/mutable.rs | 101 ++++++++++++++++++++++++++++++++++---
arrow-buffer/src/builder/mod.rs | 24 +++++++++
2 files changed, 119 insertions(+), 6 deletions(-)
diff --git a/arrow-buffer/src/buffer/mutable.rs
b/arrow-buffer/src/buffer/mutable.rs
index 75f77242b9..926a874219 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -112,6 +112,10 @@ impl MutableBuffer {
/// Allocate a new [MutableBuffer] with initial capacity to be at least
`capacity`.
///
/// See [`MutableBuffer::with_capacity`].
+ ///
+ /// # Panics
+ ///
+ /// See [`MutableBuffer::with_capacity`].
#[inline]
pub fn new(capacity: usize) -> Self {
Self::with_capacity(capacity)
@@ -147,6 +151,7 @@ impl MutableBuffer {
/// Allocates a new [MutableBuffer] with `len` and capacity to be at least
`len` where
/// all bytes are guaranteed to be `0u8`.
+ ///
/// # Example
/// ```
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
@@ -156,6 +161,10 @@ impl MutableBuffer {
/// let data = buffer.as_slice_mut();
/// assert_eq!(data[126], 0u8);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if `len` is too large to construct a valid allocation [`Layout`]
pub fn from_len_zeroed(len: usize) -> Self {
let layout = Layout::from_size_align(len, ALIGNMENT).unwrap();
let data = match layout.size() {
@@ -199,6 +208,10 @@ impl MutableBuffer {
/// creates a new [MutableBuffer] with capacity and length capable of
holding `len` bits.
/// This is useful to create a buffer for packed bitmaps.
+ ///
+ /// # Panics
+ ///
+ /// See [`MutableBuffer::from_len_zeroed`].
pub fn new_null(len: usize) -> Self {
let num_bytes = bit_util::ceil(len, 8);
MutableBuffer::from_len_zeroed(num_bytes)
@@ -210,6 +223,10 @@ impl MutableBuffer {
/// This is useful when one wants to clear (or set) the bits and then
manipulate
/// the buffer directly (e.g., modifying the buffer by holding a mutable
reference
/// from `data_mut()`).
+ ///
+ /// # Panics
+ ///
+ /// Panics if `end` exceeds the buffer capacity.
pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
assert!(end <= self.layout.size());
let v = if val { 255 } else { 0 };
@@ -225,6 +242,10 @@ impl MutableBuffer {
/// This is used to initialize the bits in a buffer, however, it has no
impact on the
/// `len` of the buffer and so can be used to initialize the memory region
from
/// `len` to `capacity`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the byte range `start..start + count` exceeds the buffer
capacity.
pub fn set_null_bits(&mut self, start: usize, count: usize) {
assert!(
start.saturating_add(count) <= self.layout.size(),
@@ -250,11 +271,19 @@ impl MutableBuffer {
/// let buffer: Buffer = buffer.into();
/// assert_eq!(buffer.len(), 253);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if `self.len + additional` overflows `usize`, or if the
required capacity is too
+ /// large to round up to the next 64-byte boundary and construct a valid
allocation layout.
// For performance reasons, this must be inlined so that the `if` is
executed inside the caller, and not as an extra call that just
// exits.
#[inline(always)]
pub fn reserve(&mut self, additional: usize) {
- let required_cap = self.len + additional;
+ let required_cap = self
+ .len
+ .checked_add(additional)
+ .expect("buffer length overflow");
if required_cap > self.layout.size() {
let new_capacity =
bit_util::round_upto_multiple_of_64(required_cap);
let new_capacity = std::cmp::max(new_capacity, self.layout.size()
* 2);
@@ -274,6 +303,12 @@ impl MutableBuffer {
/// buffer.repeat_slice_n_times(bytes_to_repeat, 3);
/// assert_eq!(buffer.as_slice(), b"ababab");
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the repeated slice byte length overflows `usize`, if the
resulting buffer
+ /// length overflows `usize`, or if reserving the required capacity fails
for the same
+ /// reasons as [`MutableBuffer::reserve`].
pub fn repeat_slice_n_times<T: ArrowNativeType>(
&mut self,
slice_to_repeat: &[T],
@@ -385,6 +420,11 @@ impl MutableBuffer {
/// buffer.resize(253, 2); // allocates for the first time
/// assert_eq!(buffer.as_slice()[252], 2u8);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if growing the buffer requires reserving a capacity that fails
for the same
+ /// reasons as [`MutableBuffer::reserve`].
// For performance reasons, this must be inlined so that the `if` is
executed inside the caller, and not as an extra call that just
// exits.
#[inline(always)]
@@ -420,6 +460,11 @@ impl MutableBuffer {
/// buffer.shrink_to_fit();
/// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128);
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the current length is too large to round up to the next
64-byte boundary and
+ /// construct a valid allocation layout.
pub fn shrink_to_fit(&mut self) {
let new_capacity = bit_util::round_upto_multiple_of_64(self.len);
if new_capacity < self.layout.size() {
@@ -493,8 +538,8 @@ impl MutableBuffer {
///
/// # Panics
///
- /// This function panics if the underlying buffer is not aligned
- /// correctly for type `T`.
+ /// This function panics if the underlying buffer is not aligned correctly
for type `T`, or
+ /// if its length is not a multiple of `size_of::<T>()`.
pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
// SAFETY
// ArrowNativeType is trivially transmutable, is sealed to prevent
potentially incorrect
@@ -508,8 +553,8 @@ impl MutableBuffer {
///
/// # Panics
///
- /// This function panics if the underlying buffer is not aligned
- /// correctly for type `T`.
+ /// This function panics if the underlying buffer is not aligned correctly
for type `T`, or
+ /// if its length is not a multiple of `size_of::<T>()`.
pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
// SAFETY
// ArrowNativeType is trivially transmutable, is sealed to prevent
potentially incorrect
@@ -527,6 +572,11 @@ impl MutableBuffer {
/// buffer.extend_from_slice(&[2u32, 0]);
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if extending the buffer requires reserving a capacity that
fails for the same
+ /// reasons as [`MutableBuffer::reserve`].
#[inline]
pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
let additional = mem::size_of_val(items);
@@ -550,6 +600,11 @@ impl MutableBuffer {
/// buffer.push(256u32);
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if extending the buffer requires reserving a capacity that
fails for the same
+ /// reasons as [`MutableBuffer::reserve`].
#[inline]
pub fn push<T: ToByteSlice>(&mut self, item: T) {
let additional = std::mem::size_of::<T>();
@@ -575,13 +630,26 @@ impl MutableBuffer {
}
/// Extends the buffer by `additional` bytes equal to `0u8`, incrementing
its capacity if needed.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `self.len + additional` overflows `usize`, or if growing the
buffer requires
+ /// reserving a capacity that fails for the same reasons as
[`MutableBuffer::reserve`].
#[inline]
pub fn extend_zeros(&mut self, additional: usize) {
- self.resize(self.len + additional, 0);
+ let new_len = self
+ .len
+ .checked_add(additional)
+ .expect("buffer length overflow");
+ self.resize(new_len, 0);
}
/// # Safety
/// The caller must ensure that the buffer was properly initialized up to
`len`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `len` exceeds the buffer capacity.
#[inline]
pub unsafe fn set_len(&mut self, len: usize) {
assert!(len <= self.capacity());
@@ -726,6 +794,13 @@ impl MutableBuffer {
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if the
+ /// reported length does not match the number of items produced, or if
allocating the
+ /// required buffer fails for the same reasons as [`MutableBuffer::new`].
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
@@ -770,6 +845,12 @@ impl MutableBuffer {
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter)
};
/// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
/// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if it yields
+ /// fewer items than reported.
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
@@ -788,6 +869,14 @@ impl MutableBuffer {
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted
(upper) length or errors
/// if any of the items of the iterator is an error.
/// Prefer this to `collect` whenever possible, as it is faster ~60%
faster.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the iterator does not report an upper bound via `size_hint`,
or if the
+ /// reported length does not match the number of items produced before an
error-free finish,
+ /// or if allocating the required buffer fails for the same reasons as
+ /// [`MutableBuffer::new`].
+ ///
/// # Safety
/// This method assumes that the iterator's size is correct and is
undefined behavior
/// to use it on an iterator that reports an incorrect length.
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index e44397258f..0c58f2c5c7 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -426,4 +426,28 @@ mod tests {
let slice = builder.as_slice_mut();
assert_eq!(slice.len(), 222);
}
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn reserve_length_overflow() {
+ let mut builder = BufferBuilder::<u8>::new(1);
+ builder.append(0);
+ builder.reserve(usize::MAX);
+ }
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn append_n_zeroed_length_overflow() {
+ let mut builder = BufferBuilder::<u64>::new(1);
+ builder.append_n_zeroed(1);
+ builder.append_n_zeroed(usize::MAX / mem::size_of::<u64>());
+ }
+
+ #[test]
+ #[should_panic(expected = "buffer length overflow")]
+ fn advance_length_overflow() {
+ let mut builder = BufferBuilder::<u64>::new(1);
+ builder.advance(1);
+ builder.advance(usize::MAX / mem::size_of::<u64>());
+ }
}