alamb commented on code in PR #8185:
URL: https://github.com/apache/arrow-rs/pull/8185#discussion_r2288977440


##########
parquet-variant/src/builder.rs:
##########
@@ -589,72 +582,190 @@ impl<S: AsRef<str>> Extend<S> for MetadataBuilder {
 /// parent, and we cannot "split" a mutable reference across two objects 
(parent state and the child
 /// builder that uses it). So everything has to be here. Rust layout 
optimizations should treat the
 /// variants as a union, so that accessing a `buffer` or `metadata_builder` is 
branch-free.
+#[derive(Debug)]
 enum ParentState<'a> {
     Variant {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
+        saved_metadata_builder_dict_size: usize,
+        finished: bool,
     },
     List {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
-        parent_value_offset_base: usize,
+        saved_metadata_builder_dict_size: usize,
         offsets: &'a mut Vec<usize>,
+        saved_offsets_size: usize,
+        finished: bool,
     },
     Object {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
+        saved_metadata_builder_dict_size: usize,
         fields: &'a mut IndexMap<u32, usize>,
-        field_name: &'a str,
-        parent_value_offset_base: usize,
+        saved_fields_size: usize,
+        finished: bool,
     },
 }
 
-impl ParentState<'_> {
-    fn buffer(&mut self) -> &mut ValueBuffer {
-        match self {
-            ParentState::Variant { buffer, .. } => buffer,
-            ParentState::List { buffer, .. } => buffer,
-            ParentState::Object { buffer, .. } => buffer,
+impl<'a> ParentState<'a> {
+    fn variant(buffer: &'a mut ValueBuffer, metadata_builder: &'a mut 
MetadataBuilder) -> Self {
+        ParentState::Variant {
+            saved_buffer_offset: buffer.offset(),
+            saved_metadata_builder_dict_size: 
metadata_builder.num_field_names(),
+            buffer,
+            metadata_builder,
+            finished: false,
+        }
+    }
+
+    fn list(
+        buffer: &'a mut ValueBuffer,
+        metadata_builder: &'a mut MetadataBuilder,
+        offsets: &'a mut Vec<usize>,
+        saved_parent_buffer_offset: usize,
+    ) -> Self {
+        // The saved_parent_buffer_offset is the buffer size as of when the 
parent builder was
+        // constructed. The saved_buffer_offset is the buffer size as of now 
(when a child builder
+        // is created). The variant field_offset entry for this list element 
is their difference.
+        let saved_buffer_offset = buffer.offset();
+        let saved_offsets_size = offsets.len();
+        offsets.push(saved_buffer_offset - saved_parent_buffer_offset);
+
+        ParentState::List {
+            saved_metadata_builder_dict_size: 
metadata_builder.num_field_names(),
+            saved_buffer_offset,
+            saved_offsets_size,
+            metadata_builder,
+            buffer,
+            offsets,
+            finished: false,
         }
     }
 
+    fn object(

Review Comment:
   nit: could call this `try_object` to reflect it can return `Result`



##########
parquet-variant/src/builder.rs:
##########
@@ -589,72 +582,190 @@ impl<S: AsRef<str>> Extend<S> for MetadataBuilder {
 /// parent, and we cannot "split" a mutable reference across two objects 
(parent state and the child
 /// builder that uses it). So everything has to be here. Rust layout 
optimizations should treat the
 /// variants as a union, so that accessing a `buffer` or `metadata_builder` is 
branch-free.
+#[derive(Debug)]
 enum ParentState<'a> {
     Variant {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
+        saved_metadata_builder_dict_size: usize,
+        finished: bool,
     },
     List {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
-        parent_value_offset_base: usize,
+        saved_metadata_builder_dict_size: usize,
         offsets: &'a mut Vec<usize>,
+        saved_offsets_size: usize,
+        finished: bool,
     },
     Object {
         buffer: &'a mut ValueBuffer,
+        saved_buffer_offset: usize,
         metadata_builder: &'a mut MetadataBuilder,
+        saved_metadata_builder_dict_size: usize,
         fields: &'a mut IndexMap<u32, usize>,
-        field_name: &'a str,
-        parent_value_offset_base: usize,
+        saved_fields_size: usize,
+        finished: bool,
     },
 }
 
-impl ParentState<'_> {
-    fn buffer(&mut self) -> &mut ValueBuffer {
-        match self {
-            ParentState::Variant { buffer, .. } => buffer,
-            ParentState::List { buffer, .. } => buffer,
-            ParentState::Object { buffer, .. } => buffer,
+impl<'a> ParentState<'a> {
+    fn variant(buffer: &'a mut ValueBuffer, metadata_builder: &'a mut 
MetadataBuilder) -> Self {
+        ParentState::Variant {
+            saved_buffer_offset: buffer.offset(),
+            saved_metadata_builder_dict_size: 
metadata_builder.num_field_names(),
+            buffer,
+            metadata_builder,
+            finished: false,
+        }
+    }
+
+    fn list(
+        buffer: &'a mut ValueBuffer,
+        metadata_builder: &'a mut MetadataBuilder,
+        offsets: &'a mut Vec<usize>,
+        saved_parent_buffer_offset: usize,
+    ) -> Self {
+        // The saved_parent_buffer_offset is the buffer size as of when the 
parent builder was
+        // constructed. The saved_buffer_offset is the buffer size as of now 
(when a child builder
+        // is created). The variant field_offset entry for this list element 
is their difference.
+        let saved_buffer_offset = buffer.offset();
+        let saved_offsets_size = offsets.len();
+        offsets.push(saved_buffer_offset - saved_parent_buffer_offset);
+
+        ParentState::List {
+            saved_metadata_builder_dict_size: 
metadata_builder.num_field_names(),
+            saved_buffer_offset,
+            saved_offsets_size,
+            metadata_builder,
+            buffer,
+            offsets,
+            finished: false,
         }
     }
 
+    fn object(
+        buffer: &'a mut ValueBuffer,
+        metadata_builder: &'a mut MetadataBuilder,
+        fields: &'a mut IndexMap<u32, usize>,
+        saved_parent_buffer_offset: usize,
+        field_name: &str,
+        validate_unique_fields: bool,
+    ) -> Result<Self, ArrowError> {
+        // The saved_parent_buffer_offset is the buffer size as of when the 
parent builder was
+        // constructed. The saved_buffer_offset is the buffer size as of now 
(when a child builder
+        // is created). The variant field_offset entry for this field is their 
difference.
+        let saved_buffer_offset = buffer.offset();
+        let saved_fields_size = fields.len();
+        let saved_metadata_builder_dict_size = 
metadata_builder.num_field_names();
+        let field_id = metadata_builder.upsert_field_name(field_name);
+        let field_start = saved_buffer_offset - saved_parent_buffer_offset;
+        if fields.insert(field_id, field_start).is_some() && 
validate_unique_fields {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Duplicate field name: {field_name}"
+            )));
+        }
+
+        Ok(ParentState::Object {
+            saved_metadata_builder_dict_size,
+            saved_buffer_offset,
+            saved_fields_size,
+            buffer,
+            metadata_builder,
+            fields,
+            finished: false,
+        })
+    }
+
+    fn buffer(&mut self) -> &mut ValueBuffer {
+        self.buffer_and_metadata_builder().0
+    }
+
     fn metadata_builder(&mut self) -> &mut MetadataBuilder {
+        self.buffer_and_metadata_builder().1
+    }
+
+    fn saved_buffer_offset(&mut self) -> usize {
         match self {
             ParentState::Variant {
-                metadata_builder, ..
-            } => metadata_builder,
-            ParentState::List {
-                metadata_builder, ..
-            } => metadata_builder,
-            ParentState::Object {
-                metadata_builder, ..
-            } => metadata_builder,
+                saved_buffer_offset,
+                ..
+            }
+            | ParentState::List {
+                saved_buffer_offset,
+                ..
+            }
+            | ParentState::Object {
+                saved_buffer_offset,
+                ..
+            } => *saved_buffer_offset,
         }
     }
 
-    // Performs any parent-specific aspects of finishing, after the child has 
appended all necessary
-    // bytes to the parent's value buffer. ListBuilder records the new value's 
starting offset;
-    // ObjectBuilder associates the new value's starting offset with its field 
id; VariantBuilder
-    // doesn't need anything special.
-    fn finish(&mut self, starting_offset: usize) {
+    fn is_finished(&mut self) -> &mut bool {
+        match self {
+            ParentState::Variant { finished, .. }
+            | ParentState::List { finished, .. }
+            | ParentState::Object { finished, .. } => finished,
+        }
+    }
+
+    // Mark the insertion as having succeeded.
+    fn finish(&mut self) {
+        *self.is_finished() = true
+    }
+
+    // Performs any parent-specific aspects of rolling back a builder if an 
insertion failed.
+    fn rollback(&mut self) {
+        if *self.is_finished() {
+            return;
+        }
+
+        // All builders need to revert the buffers
+        match self {
+            ParentState::Variant {
+                buffer,
+                saved_buffer_offset,
+                metadata_builder,
+                saved_metadata_builder_dict_size,
+                ..
+            }
+            | ParentState::List {
+                buffer,
+                saved_buffer_offset,
+                metadata_builder,
+                saved_metadata_builder_dict_size,
+                ..
+            }
+            | ParentState::Object {
+                buffer,
+                saved_buffer_offset,
+                metadata_builder,
+                saved_metadata_builder_dict_size,
+                ..
+            } => {
+                buffer.inner_mut().truncate(*saved_buffer_offset);

Review Comment:
   this is very nice



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to