This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new ac81eb2b16 Improve docs and add build() method to 
`{Null,Boolean,}BufferBuilder` (#9155)
ac81eb2b16 is described below

commit ac81eb2b16e8535056b3aef8d5b5c21c6ea9d5bd
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 14 17:43:29 2026 -0500

    Improve docs and add build() method to `{Null,Boolean,}BufferBuilder` 
(#9155)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/9128
    - Follow on to https://github.com/apache/arrow-rs/pull/9120
    
    # Rationale for this change
    
    I am trying to encourage people to avoid using ArrayData when
    constructing arrays (as it is slower than just creating the arrays
    directly). Part of doing so is ensuring that the APIs to create the
    necessary pieces (NullBuffers in particular) are easy to use / well
    documented.
    
    As pointed out by @scovich on
    https://github.com/apache/arrow-rs/pull/9120#issuecomment-3739126785, it
    is
    1. Not obvious how `finish` works (resets the builder)
    2. Why there is no `build` method (when there is a From impl)
    
    Thus, let's add `build` methods to `NullBufferBuilder` and document the
    difference between `finish` and `build`
    
    While I was working on this change, I noticed the same issue with
    `BufferBuilder` and `BooleanBufferBuilder` so I also made them
    consistent
    
    
    # What changes are included in this PR?
    
    1. Improve docs and Add build() method to {Null,Boolean,}BufferBuilder
    
    # Are these changes tested?
    
    Yes by CI and new doc examples
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
    
    ---------
    
    Co-authored-by: Ed Seidl <[email protected]>
---
 arrow-buffer/src/buffer/null.rs     |  5 +++--
 arrow-buffer/src/builder/boolean.rs | 35 +++++++++++++++++++++++++++++----
 arrow-buffer/src/builder/mod.rs     | 39 ++++++++++++++++++++++++++++++-------
 arrow-buffer/src/builder/null.rs    | 32 +++++++++++++++++++-----------
 4 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index e5e3a610ea..64a21d99e8 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -19,13 +19,14 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator, 
BitSliceIterator};
 use crate::buffer::BooleanBuffer;
 use crate::{Buffer, MutableBuffer};
 
-/// A [`BooleanBuffer`] used to encode validity for Arrow arrays
+/// A [`BooleanBuffer`] used to encode validity (null values) for Arrow arrays
 ///
 /// In the [Arrow specification], array validity is encoded in a packed 
bitmask with a
 /// `true` value indicating the corresponding slot is not null, and `false` 
indicating
 /// that it is null.
 ///
-/// `NullBuffer`s can be creating using [`NullBufferBuilder`]
+/// # See also
+/// * [`NullBufferBuilder`] for creating `NullBuffer`s  
 ///
 /// [Arrow specification]: 
https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
 /// [`NullBufferBuilder`]: crate::NullBufferBuilder
diff --git a/arrow-buffer/src/builder/boolean.rs 
b/arrow-buffer/src/builder/boolean.rs
index 7990be1e7c..956f4a3e22 100644
--- a/arrow-buffer/src/builder/boolean.rs
+++ b/arrow-buffer/src/builder/boolean.rs
@@ -21,11 +21,28 @@ use std::ops::Range;
 
 /// Builder for [`BooleanBuffer`]
 ///
+/// Builds a packed buffer of bits representing boolean values. Each bit in the
+/// buffer corresponds to a boolean value,
+///
 /// # See Also
 ///
-/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
+/// * [`NullBufferBuilder`] for building [`BooleanBuffer`]s for representing 
nulls
+/// * [`BufferBuilder`] for building [`Buffer`]s
+///
+/// # Example
+/// ```
+/// # use arrow_buffer::builder::BooleanBufferBuilder;
+/// let mut builder = BooleanBufferBuilder::new(10);
+/// builder.append(true);
+/// builder.append(false);
+/// builder.append_n(3, true); // append 3 trues
+/// let buffer = builder.build();
+/// assert_eq!(buffer.len(), 5); // 5 bits appended
+/// assert_eq!(buffer.values(), &[0b00011101_u8]); // packed bits
+///```
 ///
-/// [`NullBuffer`]: crate::NullBuffer
+/// [`BufferBuilder`]: crate::builder::BufferBuilder
+/// [`NullBufferBuilder`]: crate::builder::NullBufferBuilder
 #[derive(Debug)]
 pub struct BooleanBufferBuilder {
     buffer: MutableBuffer,
@@ -247,7 +264,9 @@ impl BooleanBufferBuilder {
         self.buffer.as_slice_mut()
     }
 
-    /// Creates a [`BooleanBuffer`]
+    /// Resets this builder and returns a [`BooleanBuffer`].
+    ///
+    /// Use [`Self::build`] when you don't need to reuse this builder.
     #[inline]
     pub fn finish(&mut self) -> BooleanBuffer {
         let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
@@ -255,6 +274,14 @@ impl BooleanBufferBuilder {
         BooleanBuffer::new(buf.into(), 0, len)
     }
 
+    /// Builds a [`BooleanBuffer`] without resetting the builder.
+    ///
+    /// This consumes the builder. Use [`Self::finish`] to reuse it.
+    #[inline]
+    pub fn build(self) -> BooleanBuffer {
+        BooleanBuffer::new(self.buffer.into(), 0, self.len)
+    }
+
     /// Builds the [BooleanBuffer] without resetting the builder.
     pub fn finish_cloned(&self) -> BooleanBuffer {
         BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, 
self.len)
@@ -285,7 +312,7 @@ impl From<BooleanBufferBuilder> for Buffer {
 impl From<BooleanBufferBuilder> for BooleanBuffer {
     #[inline]
     fn from(builder: BooleanBufferBuilder) -> Self {
-        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
+        builder.build()
     }
 }
 
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index abe510bdab..1abb8018ce 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -28,23 +28,31 @@ pub use offset::*;
 use crate::{ArrowNativeType, Buffer, MutableBuffer};
 use std::marker::PhantomData;
 
-/// Builder for creating a [Buffer] object.
+/// Builder for creating Arrow [`Buffer`] objects
 ///
-/// A [Buffer] is the underlying data structure of Arrow's Arrays.
+/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
 ///
 /// For all supported types, there are type definitions for the
 /// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
 ///
+/// **Note it is typically faster to create buffers directly from `Vec`**.
+/// See example on [`Buffer`].
+///
+/// # See Also
+/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
+/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
+///
+/// [`BooleanBuffer`]: crate::BooleanBuffer
+/// [`NullBuffer`]: crate::NullBuffer
+///
 /// # Example:
 ///
 /// ```
 /// # use arrow_buffer::builder::BufferBuilder;
-///
 /// let mut builder = BufferBuilder::<u8>::new(100);
 /// builder.append_slice(&[42, 43, 44]);
 /// builder.append(45);
 /// let buffer = builder.finish();
-///
 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
 /// ```
 #[derive(Debug)]
@@ -341,16 +349,15 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
 
     /// Resets this builder and returns an immutable [Buffer].
     ///
+    /// Use [`Self::build`] when you don't need to reuse this builder.
+    ///
     /// # Example:
     ///
     /// ```
     /// # use arrow_buffer::builder::BufferBuilder;
-    ///
     /// let mut builder = BufferBuilder::<u8>::new(10);
     /// builder.append_slice(&[42, 44, 46]);
-    ///
     /// let buffer = builder.finish();
-    ///
     /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
     /// ```
     #[inline]
@@ -359,6 +366,24 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
         self.len = 0;
         buf.into()
     }
+
+    /// Builds an immutable [Buffer] without resetting the builder.
+    ///
+    /// This consumes the builder. Use [`Self::finish`] to reuse it.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// # use arrow_buffer::builder::BufferBuilder;
+    /// let mut builder = BufferBuilder::<u8>::new(10);
+    /// builder.append_slice(&[42, 44, 46]);
+    /// let buffer = builder.build();
+    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
+    /// ```
+    #[inline]
+    pub fn build(self) -> Buffer {
+        self.buffer.into()
+    }
 }
 
 impl<T: ArrowNativeType> Default for BufferBuilder<T> {
diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs
index e6f426615b..2ffd4dcd4c 100644
--- a/arrow-buffer/src/builder/null.rs
+++ b/arrow-buffer/src/builder/null.rs
@@ -17,19 +17,22 @@
 
 use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
 
-/// Builder for creating [`NullBuffer`]
+/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls).
+///
+/// # See also
+/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder.
+/// * [`Self::allocated_size`] for the current memory allocated by the builder.
 ///
 /// # Performance
 ///
-/// This builder only materializes the buffer when we append `false`.
-/// If you only append `true`s to the builder, what you get will be
-/// `None` when calling [`finish`](#method.finish).
+/// This builder only materializes the buffer when null values (`false`) are
+/// appended. If you only append non-null, (`true`) to the builder, no buffer 
is
+/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return
+/// `None`.
 ///
 /// This optimization is **very** important for the performance as it avoids
 /// allocating memory for the null buffer when there are no nulls.
 ///
-/// See [`Self::allocated_size`] to get the current memory allocated by the 
builder.
-///
 /// # Example
 /// ```
 /// # use arrow_buffer::NullBufferBuilder;
@@ -193,11 +196,20 @@ impl NullBufferBuilder {
         }
     }
 
-    /// Builds the null buffer and resets the builder.
-    /// Returns `None` if the builder only contains `true`s.
+    /// Builds the [`NullBuffer`] and resets the builder.
+    ///
+    /// Returns `None` if the builder only contains `true`s. Use 
[`Self::build`]
+    /// when you don't need to reuse this builder.
     pub fn finish(&mut self) -> Option<NullBuffer> {
         self.len = 0;
-        Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
+        Some(NullBuffer::new(self.bitmap_builder.take()?.build()))
+    }
+
+    /// Builds the [`NullBuffer`] without resetting the builder.
+    ///
+    /// This consumes the builder. Use [`Self::finish`] to reuse it.
+    pub fn build(self) -> Option<NullBuffer> {
+        self.bitmap_builder.map(NullBuffer::from)
     }
 
     /// Builds the [NullBuffer] without resetting the builder.
@@ -238,9 +250,7 @@ impl NullBufferBuilder {
             .map(|b| b.capacity() / 8)
             .unwrap_or(0)
     }
-}
 
-impl NullBufferBuilder {
     /// Return the number of bits in the buffer.
     pub fn len(&self) -> usize {
         self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())

Reply via email to