scovich commented on code in PR #7987:
URL: https://github.com/apache/arrow-rs/pull/7987#discussion_r2232259020


##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large

Review Comment:
   ```suggestion
           let num_elements_size = if is_large { 4 } else { 1 }
           let header_size = 1 +   // header
               num_elements_size + // num_elements
   ```
   (and then can reuse `num_elements_size` below)



##########
parquet-variant/src/builder.rs:
##########
@@ -1121,16 +1099,27 @@ impl VariantBuilder {
 pub struct ListBuilder<'a> {
     parent_state: ParentState<'a>,
     offsets: Vec<usize>,
-    buffer: ValueBuffer,
+    /// The starting offset in the parent's buffer where this list starts
+    parent_value_offset_base: usize,
+    /// The starting offset in the parent's metadata buffer where this list 
starts
+    /// used to truncate the written fields in `drop` if the current list has 
not been finished
+    parent_metadata_offset_base: usize,
+    /// Whether the list has been finished, the written content of the current 
list
+    /// will be truncated in `drop` if `has_been_finished` is false
+    has_been_finished: bool,
     validate_unique_fields: bool,
 }
 
 impl<'a> ListBuilder<'a> {
     fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> 
Self {
+        let offset_base = parent_state.buffer_current_offset();
+        let meta_offset_base = parent_state.metadata_current_offset();
         Self {
             parent_state,
             offsets: vec![],
-            buffer: ValueBuffer::default(),
+            parent_value_offset_base: offset_base,
+            has_been_finished: false,
+            parent_metadata_offset_base: meta_offset_base,

Review Comment:
   If we're anyway doing `:`, why not just fold in the logic directly?
   ```suggestion
               parent_value_offset_base: parent_state.buffer_current_offset(),
               has_been_finished: false,
               parent_metadata_offset_base: 
parent_state.metadata_current_offset(),
   ```
   Alternatively, the `let` above could give the correct name from the start, 
so it can just be passed directly:
   ```suggestion
               parent_value_offset_base,
               has_been_finished: false,
               parent_metadata_offset_base,
   ```



##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large
+            (self.offsets.len() + 1) * offset_size as usize; // offsets and 
data size
+
+        // Calculated header size becomes a hint; being wrong only risks extra 
allocations.
+        // Make sure to reserve enough capacity to handle the extra bytes 
we'll truncate.
+        let mut bytes_to_splice = Vec::with_capacity(header_size + 3);
         // Write header
         let header = array_header(is_large, offset_size);
+        bytes_to_splice.push(header);
 
-        let num_elements_bytes =
-            num_elements
-                .to_le_bytes()
-                .into_iter()
-                .take(if is_large { 4 } else { 1 });
-        let offsets = PackedU32Iterator::new(
-            offset_size as usize,
-            self.offsets
-                .clone()
-                .into_iter()
-                .map(|offset| (offset as u32).to_le_bytes()),
+        append_packed_u32(
+            &mut bytes_to_splice,
+            num_elements as u32,
+            if is_large { 4 } else { 1 },

Review Comment:
   ```suggestion
               num_elements_size,
   ```



##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large
+            (self.offsets.len() + 1) * offset_size as usize; // offsets and 
data size

Review Comment:
   ```suggestion
               (num_elements + 1) * offset_size as usize; // offsets and data 
size
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to