This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 653ca78452 [Variant] Rename ValueBuffer as ValueBuilder (#8187)
653ca78452 is described below

commit 653ca784525ca39929c0bd2c4572cf330cdf41d6
Author: Ryan Johnson <[email protected]>
AuthorDate: Wed Aug 20 11:53:12 2025 -0700

    [Variant] Rename ValueBuffer as ValueBuilder (#8187)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/8186
    
    # Rationale for this change
    
    The class has always built variant values (metadata being handled
    separately), so rename it `ValueBuilder` to be more self-documenting.
    
    # What changes are included in this PR?
    
    Class renamed, along with all methods, local variables and documentation
    that reference it.
    
    # Are these changes tested?
    
    It's a rename. If it compiles it's correct.
    
    # Are there any user-facing changes?
    
    No.
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 parquet-variant/src/builder.rs | 171 +++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 91 deletions(-)

diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index fe3dd52853..c9da44a068 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -87,28 +87,28 @@ fn append_packed_u32(dest: &mut Vec<u8>, value: u32, 
value_size: usize) {
 ///
 /// You can reuse an existing `Vec<u8>` by using the `from` impl
 #[derive(Debug, Default)]
-struct ValueBuffer(Vec<u8>);
+struct ValueBuilder(Vec<u8>);
 
-impl ValueBuffer {
+impl ValueBuilder {
     /// Construct a ValueBuffer that will write to a new underlying `Vec`
     fn new() -> Self {
         Default::default()
     }
 }
 
-impl From<Vec<u8>> for ValueBuffer {
+impl From<Vec<u8>> for ValueBuilder {
     fn from(value: Vec<u8>) -> Self {
         Self(value)
     }
 }
 
-impl From<ValueBuffer> for Vec<u8> {
-    fn from(value_buffer: ValueBuffer) -> Self {
+impl From<ValueBuilder> for Vec<u8> {
+    fn from(value_buffer: ValueBuilder) -> Self {
         value_buffer.0
     }
 }
 
-impl ValueBuffer {
+impl ValueBuilder {
     fn append_u8(&mut self, term: u8) {
         self.0.push(term);
     }
@@ -312,7 +312,7 @@ impl ValueBuffer {
         metadata_builder: &'a mut MetadataBuilder,
     ) -> ObjectBuilder<'a> {
         let parent_state = ParentState::Variant {
-            buffer: self,
+            value_builder: self,
             metadata_builder,
         };
         let validate_unique_fields = false;
@@ -321,19 +321,19 @@ impl ValueBuffer {
 
     fn new_list<'a>(&'a mut self, metadata_builder: &'a mut MetadataBuilder) 
-> ListBuilder<'a> {
         let parent_state = ParentState::Variant {
-            buffer: self,
+            value_builder: self,
             metadata_builder,
         };
         let validate_unique_fields = false;
         ListBuilder::new(parent_state, validate_unique_fields)
     }
 
-    /// Appends a variant to the buffer.
+    /// Appends a variant to the builder.
     ///
     /// # Panics
     ///
     /// This method will panic if the variant contains duplicate field names 
in objects
-    /// when validation is enabled. For a fallible version, use 
[`ValueBuffer::try_append_variant`]
+    /// when validation is enabled. For a fallible version, use 
[`ValueBuilder::try_append_variant`]
     fn append_variant<'m, 'd>(
         &mut self,
         variant: Variant<'m, 'd>,
@@ -367,7 +367,7 @@ impl ValueBuffer {
         }
     }
 
-    /// Appends a variant to the buffer
+    /// Appends a variant to the builder
     fn try_append_variant<'m, 'd>(
         &mut self,
         variant: Variant<'m, 'd>,
@@ -404,7 +404,7 @@ impl ValueBuffer {
     }
 
     /// Writes out the header byte for a variant object or list, from the 
starting position
-    /// of the buffer, will return the position after this write
+    /// of the builder, will return the position after this write
     fn append_header_start_from_buf_pos(
         &mut self,
         start_pos: usize, // the start position where the header will be 
inserted
@@ -574,7 +574,7 @@ impl MetadataBuilder {
     }
 
     /// Return the inner buffer, without finalizing any in progress metadata.
-    pub(crate) fn take_buffer(self) -> Vec<u8> {
+    pub(crate) fn into_inner(self) -> Vec<u8> {
         self.metadata_buffer
     }
 }
@@ -609,23 +609,24 @@ impl<S: AsRef<str>> Extend<S> for MetadataBuilder {
 /// rendering the parent object completely unusable until the parent state 
goes out of scope. This
 /// ensures that at most one child builder can exist at a time.
 ///
-/// The redundancy in buffer and metadata_builder is because all the 
references come from the
-/// parent, and we cannot "split" a mutable reference across two objects 
(parent state and the child
-/// builder that uses it). So everything has to be here. Rust layout 
optimizations should treat the
-/// variants as a union, so that accessing a `buffer` or `metadata_builder` is 
branch-free.
+/// The redundancy in `value_builder` and `metadata_builder` is because all 
the references come from
+/// the parent, and we cannot "split" a mutable reference across two objects 
(parent state and the
+/// child builder that uses it). So everything has to be here. Rust layout 
optimizations should
+/// treat the variants as a union, so that accessing a `value_builder` or 
`metadata_builder` is
+/// branch-free.
 enum ParentState<'a> {
     Variant {
-        buffer: &'a mut ValueBuffer,
+        value_builder: &'a mut ValueBuilder,
         metadata_builder: &'a mut MetadataBuilder,
     },
     List {
-        buffer: &'a mut ValueBuffer,
+        value_builder: &'a mut ValueBuilder,
         metadata_builder: &'a mut MetadataBuilder,
         parent_value_offset_base: usize,
         offsets: &'a mut Vec<usize>,
     },
     Object {
-        buffer: &'a mut ValueBuffer,
+        value_builder: &'a mut ValueBuilder,
         metadata_builder: &'a mut MetadataBuilder,
         fields: &'a mut IndexMap<u32, usize>,
         field_name: &'a str,
@@ -634,30 +635,16 @@ enum ParentState<'a> {
 }
 
 impl ParentState<'_> {
-    fn buffer(&mut self) -> &mut ValueBuffer {
-        match self {
-            ParentState::Variant { buffer, .. } => buffer,
-            ParentState::List { buffer, .. } => buffer,
-            ParentState::Object { buffer, .. } => buffer,
-        }
+    fn value_builder(&mut self) -> &mut ValueBuilder {
+        self.value_and_metadata_builders().0
     }
 
     fn metadata_builder(&mut self) -> &mut MetadataBuilder {
-        match self {
-            ParentState::Variant {
-                metadata_builder, ..
-            } => metadata_builder,
-            ParentState::List {
-                metadata_builder, ..
-            } => metadata_builder,
-            ParentState::Object {
-                metadata_builder, ..
-            } => metadata_builder,
-        }
+        self.value_and_metadata_builders().1
     }
 
     // Performs any parent-specific aspects of finishing, after the child has 
appended all necessary
-    // bytes to the parent's value buffer. ListBuilder records the new value's 
starting offset;
+    // bytes to the parent's value builder. ListBuilder records the new 
value's starting offset;
     // ObjectBuilder associates the new value's starting offset with its field 
id; VariantBuilder
     // doesn't need anything special.
     fn finish(&mut self, starting_offset: usize) {
@@ -682,33 +669,33 @@ impl ParentState<'_> {
         }
     }
 
-    /// Return mutable references to the buffer and metadata builder that this
+    /// Return mutable references to the value and metadata builders that this
     /// parent state is using.
-    fn buffer_and_metadata_builder(&mut self) -> (&mut ValueBuffer, &mut 
MetadataBuilder) {
+    fn value_and_metadata_builders(&mut self) -> (&mut ValueBuilder, &mut 
MetadataBuilder) {
         match self {
             ParentState::Variant {
-                buffer,
+                value_builder,
                 metadata_builder,
             }
             | ParentState::List {
-                buffer,
+                value_builder,
                 metadata_builder,
                 ..
             }
             | ParentState::Object {
-                buffer,
+                value_builder,
                 metadata_builder,
                 ..
-            } => (buffer, metadata_builder),
+            } => (value_builder, metadata_builder),
         }
     }
 
     // Return the current offset of the underlying buffer. Used as a savepoint 
for rollback.
-    fn buffer_current_offset(&self) -> usize {
+    fn value_current_offset(&self) -> usize {
         match self {
-            ParentState::Variant { buffer, .. }
-            | ParentState::Object { buffer, .. }
-            | ParentState::List { buffer, .. } => buffer.offset(),
+            ParentState::Variant { value_builder, .. }
+            | ParentState::List { value_builder, .. }
+            | ParentState::Object { value_builder, .. } => 
value_builder.offset(),
         }
     }
 
@@ -719,10 +706,10 @@ impl ParentState<'_> {
             ParentState::Variant {
                 metadata_builder, ..
             }
-            | ParentState::Object {
+            | ParentState::List {
                 metadata_builder, ..
             }
-            | ParentState::List {
+            | ParentState::Object {
                 metadata_builder, ..
             } => metadata_builder.field_names.len(),
         }
@@ -1002,16 +989,16 @@ impl ParentState<'_> {
 /// ```
 #[derive(Default, Debug)]
 pub struct VariantBuilder {
-    buffer: ValueBuffer,
+    value_builder: ValueBuilder,
     metadata_builder: MetadataBuilder,
     validate_unique_fields: bool,
 }
 
 impl VariantBuilder {
-    /// Create a new VariantBuilder with new underlying buffer
+    /// Create a new VariantBuilder with new underlying buffers
     pub fn new() -> Self {
         Self {
-            buffer: ValueBuffer::new(),
+            value_builder: ValueBuilder::new(),
             metadata_builder: MetadataBuilder::default(),
             validate_unique_fields: false,
         }
@@ -1028,7 +1015,7 @@ impl VariantBuilder {
     /// the specified buffers.
     pub fn new_with_buffers(metadata_buffer: Vec<u8>, value_buffer: Vec<u8>) 
-> Self {
         Self {
-            buffer: ValueBuffer::from(value_buffer),
+            value_builder: ValueBuilder::from(value_buffer),
             metadata_builder: MetadataBuilder::from(metadata_buffer),
             validate_unique_fields: false,
         }
@@ -1095,7 +1082,7 @@ impl VariantBuilder {
     // Returns validate_unique_fields because we can no longer reference self 
once this method returns.
     fn parent_state(&mut self) -> (ParentState<'_>, bool) {
         let state = ParentState::Variant {
-            buffer: &mut self.buffer,
+            value_builder: &mut self.value_builder,
             metadata_builder: &mut self.metadata_builder,
         };
         (state, self.validate_unique_fields)
@@ -1133,7 +1120,7 @@ impl VariantBuilder {
     /// ```
     pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) 
{
         let variant = value.into();
-        self.buffer
+        self.value_builder
             .append_variant(variant, &mut self.metadata_builder);
     }
 
@@ -1143,7 +1130,7 @@ impl VariantBuilder {
         value: T,
     ) -> Result<(), ArrowError> {
         let variant = value.into();
-        self.buffer
+        self.value_builder
             .try_append_variant(variant, &mut self.metadata_builder)?;
 
         Ok(())
@@ -1151,7 +1138,10 @@ impl VariantBuilder {
 
     /// Finish the builder and return the metadata and value buffers.
     pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
-        (self.metadata_builder.finish(), self.buffer.into_inner())
+        (
+            self.metadata_builder.finish(),
+            self.value_builder.into_inner(),
+        )
     }
 
     /// Return the inner metadata buffers and value buffer.
@@ -1161,8 +1151,8 @@ impl VariantBuilder {
     /// values (for rolling back changes).
     pub fn into_buffers(self) -> (Vec<u8>, Vec<u8>) {
         (
-            self.metadata_builder.take_buffer(),
-            self.buffer.into_inner(),
+            self.metadata_builder.into_inner(),
+            self.value_builder.into_inner(),
         )
     }
 }
@@ -1173,7 +1163,7 @@ impl VariantBuilder {
 pub struct ListBuilder<'a> {
     parent_state: ParentState<'a>,
     offsets: Vec<usize>,
-    /// The starting offset in the parent's buffer where this list starts
+    /// The starting offset in the parent's value builder where this list 
starts
     parent_value_offset_base: usize,
     /// The starting offset in the parent's metadata buffer where this list 
starts
     /// used to truncate the written fields in `drop` if the current list has 
not been finished
@@ -1186,7 +1176,7 @@ pub struct ListBuilder<'a> {
 
 impl<'a> ListBuilder<'a> {
     fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> 
Self {
-        let parent_value_offset_base = parent_state.buffer_current_offset();
+        let parent_value_offset_base = parent_state.value_current_offset();
         let parent_metadata_offset_base = parent_state.metadata_num_fields();
         Self {
             parent_state,
@@ -1209,10 +1199,10 @@ impl<'a> ListBuilder<'a> {
 
     // Returns validate_unique_fields because we can no longer reference self 
once this method returns.
     fn parent_state(&mut self) -> (ParentState<'_>, bool) {
-        let (buffer, metadata_builder) = 
self.parent_state.buffer_and_metadata_builder();
+        let (value_builder, metadata_builder) = 
self.parent_state.value_and_metadata_builders();
 
         let state = ParentState::List {
-            buffer,
+            value_builder,
             metadata_builder,
             parent_value_offset_base: self.parent_value_offset_base,
             offsets: &mut self.offsets,
@@ -1251,12 +1241,12 @@ impl<'a> ListBuilder<'a> {
         &mut self,
         value: T,
     ) -> Result<(), ArrowError> {
-        let (buffer, metadata_builder) = 
self.parent_state.buffer_and_metadata_builder();
+        let (value_builder, metadata_builder) = 
self.parent_state.value_and_metadata_builders();
 
-        let offset = buffer.offset() - self.parent_value_offset_base;
+        let offset = value_builder.offset() - self.parent_value_offset_base;
         self.offsets.push(offset);
 
-        buffer.try_append_variant(value.into(), metadata_builder)?;
+        value_builder.try_append_variant(value.into(), metadata_builder)?;
 
         Ok(())
     }
@@ -1285,9 +1275,9 @@ impl<'a> ListBuilder<'a> {
 
     /// Finalizes this list and appends it to its parent, which otherwise 
remains unmodified.
     pub fn finish(mut self) {
-        let buffer = self.parent_state.buffer();
+        let value_builder = self.parent_state.value_builder();
 
-        let data_size = buffer
+        let data_size = value_builder
             .offset()
             .checked_sub(self.parent_value_offset_base)
             .expect("Data size overflowed usize");
@@ -1319,7 +1309,7 @@ impl<'a> ListBuilder<'a> {
 
         append_packed_u32(&mut bytes_to_splice, data_size as u32, offset_size 
as usize);
 
-        buffer
+        value_builder
             .inner_mut()
             .splice(starting_offset..starting_offset, bytes_to_splice);
 
@@ -1336,7 +1326,7 @@ impl Drop for ListBuilder<'_> {
     fn drop(&mut self) {
         if !self.has_been_finished {
             self.parent_state
-                .buffer()
+                .value_builder()
                 .inner_mut()
                 .truncate(self.parent_value_offset_base);
             self.parent_state
@@ -1353,7 +1343,7 @@ impl Drop for ListBuilder<'_> {
 pub struct ObjectBuilder<'a> {
     parent_state: ParentState<'a>,
     fields: IndexMap<u32, usize>, // (field_id, offset)
-    /// The starting offset in the parent's buffer where this object starts
+    /// The starting offset in the parent's value buffer where this object 
starts
     parent_value_offset_base: usize,
     /// The starting offset in the parent's metadata buffer where this object 
starts
     /// used to truncate the written fields in `drop` if the current object 
has not been finished
@@ -1368,7 +1358,7 @@ pub struct ObjectBuilder<'a> {
 
 impl<'a> ObjectBuilder<'a> {
     fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> 
Self {
-        let offset_base = parent_state.buffer_current_offset();
+        let offset_base = parent_state.value_current_offset();
         let meta_offset_base = parent_state.metadata_num_fields();
         Self {
             parent_state,
@@ -1398,27 +1388,27 @@ impl<'a> ObjectBuilder<'a> {
     /// Add a field with key and value to the object
     ///
     /// # See Also
-    /// - [`ObjectBuilder::insert`] for a infallabel version
+    /// - [`ObjectBuilder::insert`] for an infallible version that panics
     /// - [`ObjectBuilder::try_with_field`] for a builder-style API.
     ///
-    /// # Note
-    /// When inserting duplicate keys, the new value overwrites the previous 
mapping,
-    /// but the old value remains in the buffer, resulting in a larger variant
+    /// # Note Attempting to insert a duplicate field name produces an error 
if unique field
+    /// validation is enabled. Otherwise, the new value overwrites the 
previous field mapping
+    /// without erasing the old value, resulting in a larger variant
     pub fn try_insert<'m, 'd, T: Into<Variant<'m, 'd>>>(
         &mut self,
         key: &str,
         value: T,
     ) -> Result<(), ArrowError> {
-        let (buffer, metadata_builder) = 
self.parent_state.buffer_and_metadata_builder();
+        let (value_builder, metadata_builder) = 
self.parent_state.value_and_metadata_builders();
 
         let field_id = metadata_builder.upsert_field_name(key);
-        let field_start = buffer.offset() - self.parent_value_offset_base;
+        let field_start = value_builder.offset() - 
self.parent_value_offset_base;
 
         if self.fields.insert(field_id, field_start).is_some() && 
self.validate_unique_fields {
             self.duplicate_fields.insert(field_id);
         }
 
-        buffer.try_append_variant(value.into(), metadata_builder)?;
+        value_builder.try_append_variant(value.into(), metadata_builder)?;
         Ok(())
     }
 
@@ -1455,10 +1445,10 @@ impl<'a> ObjectBuilder<'a> {
     fn parent_state<'b>(&'b mut self, key: &'b str) -> (ParentState<'b>, bool) 
{
         let validate_unique_fields = self.validate_unique_fields;
 
-        let (buffer, metadata_builder) = 
self.parent_state.buffer_and_metadata_builder();
+        let (value_builder, metadata_builder) = 
self.parent_state.value_and_metadata_builders();
 
         let state = ParentState::Object {
-            buffer,
+            value_builder,
             metadata_builder,
             fields: &mut self.fields,
             field_name: key,
@@ -1510,8 +1500,8 @@ impl<'a> ObjectBuilder<'a> {
         let max_id = self.fields.iter().map(|(i, _)| *i).max().unwrap_or(0);
         let id_size = int_size(max_id as usize);
 
-        let parent_buffer = self.parent_state.buffer();
-        let current_offset = parent_buffer.offset();
+        let value_builder = self.parent_state.value_builder();
+        let current_offset = value_builder.offset();
         // Current object starts from `object_start_offset`
         let data_size = current_offset - self.parent_value_offset_base;
         let offset_size = int_size(data_size);
@@ -1527,8 +1517,7 @@ impl<'a> ObjectBuilder<'a> {
         let starting_offset = self.parent_value_offset_base;
 
         // Shift existing data to make room for the header
-        let buffer = parent_buffer.inner_mut();
-        buffer.splice(
+        value_builder.inner_mut().splice(
             starting_offset..starting_offset,
             std::iter::repeat_n(0u8, header_size),
         );
@@ -1541,12 +1530,12 @@ impl<'a> ObjectBuilder<'a> {
 
         header_pos = self
             .parent_state
-            .buffer()
+            .value_builder()
             .append_header_start_from_buf_pos(header_pos, header, is_large, 
num_fields);
 
         header_pos = self
             .parent_state
-            .buffer()
+            .value_builder()
             .append_offset_array_start_from_buf_pos(
                 header_pos,
                 self.fields.keys().copied().map(|id| id as usize),
@@ -1555,7 +1544,7 @@ impl<'a> ObjectBuilder<'a> {
             );
 
         self.parent_state
-            .buffer()
+            .value_builder()
             .append_offset_array_start_from_buf_pos(
                 header_pos,
                 self.fields.values().copied(),
@@ -1577,10 +1566,10 @@ impl<'a> ObjectBuilder<'a> {
 /// is finalized.
 impl Drop for ObjectBuilder<'_> {
     fn drop(&mut self) {
-        // Truncate the buffer if the `finish` method has not been called.
+        // Truncate the buffers if the `finish` method has not been called.
         if !self.has_been_finished {
             self.parent_state
-                .buffer()
+                .value_builder()
                 .inner_mut()
                 .truncate(self.parent_value_offset_base);
 

Reply via email to