klion26 commented on code in PR #7987:
URL: https://github.com/apache/arrow-rs/pull/7987#discussion_r2233945914


##########
parquet-variant/src/builder.rs:
##########
@@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> {
 
     /// Finalizes this list and appends it to its parent, which otherwise 
remains unmodified.
     pub fn finish(mut self) {
-        let data_size = self.buffer.offset();
+        let buffer = self.parent_state.buffer();
+
+        let data_size = buffer.offset() - self.parent_value_offset_base;

Review Comment:
   fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large

Review Comment:
   fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> {
 
     /// Finalizes this list and appends it to its parent, which otherwise 
remains unmodified.
     pub fn finish(mut self) {
-        let data_size = self.buffer.offset();
+        let buffer = self.parent_state.buffer();
+
+        let data_size = buffer.offset() - self.parent_value_offset_base;
+
         let num_elements = self.offsets.len();
         let is_large = num_elements > u8::MAX as usize;
         let offset_size = int_size(data_size);
 
-        // Get parent's buffer
-        let parent_buffer = self.parent_state.buffer();
-        let starting_offset = parent_buffer.offset();
+        let starting_offset = self.parent_value_offset_base;
+
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large

Review Comment:
   fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large
+            (self.offsets.len() + 1) * offset_size as usize; // offsets and 
data size
+
+        // Calculated header size becomes a hint; being wrong only risks extra 
allocations.
+        // Make sure to reserve enough capacity to handle the extra bytes 
we'll truncate.
+        let mut bytes_to_splice = Vec::with_capacity(header_size + 3);
         // Write header
         let header = array_header(is_large, offset_size);
+        bytes_to_splice.push(header);
 
-        let num_elements_bytes =
-            num_elements
-                .to_le_bytes()
-                .into_iter()
-                .take(if is_large { 4 } else { 1 });
-        let offsets = PackedU32Iterator::new(
-            offset_size as usize,
-            self.offsets
-                .clone()
-                .into_iter()
-                .map(|offset| (offset as u32).to_le_bytes()),
+        append_packed_u32(
+            &mut bytes_to_splice,
+            num_elements as u32,
+            if is_large { 4 } else { 1 },

Review Comment:
   fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -1121,16 +1099,27 @@ impl VariantBuilder {
 pub struct ListBuilder<'a> {
     parent_state: ParentState<'a>,
     offsets: Vec<usize>,
-    buffer: ValueBuffer,
+    /// The starting offset in the parent's buffer where this list starts
+    parent_value_offset_base: usize,
+    /// The starting offset in the parent's metadata buffer where this list 
starts
+    /// used to truncate the written fields in `drop` if the current list has 
not been finished
+    parent_metadata_offset_base: usize,
+    /// Whether the list has been finished, the written content of the current 
list
+    /// will be truncated in `drop` if `has_been_finished` is false
+    has_been_finished: bool,
     validate_unique_fields: bool,
 }
 
 impl<'a> ListBuilder<'a> {
     fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> 
Self {
+        let offset_base = parent_state.buffer_current_offset();
+        let meta_offset_base = parent_state.metadata_current_offset();
         Self {
             parent_state,
             offsets: vec![],
-            buffer: ValueBuffer::default(),
+            parent_value_offset_base: offset_base,
+            has_been_finished: false,
+            parent_metadata_offset_base: meta_offset_base,

Review Comment:
   Has changed the local variable name, the current implementation aims to make 
the compiler happy, as `parent_state` has been moved before(the first 
parameter).



##########
parquet-variant/src/builder.rs:
##########
@@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> {
 
         let starting_offset = self.parent_value_offset_base;
 
+        let header_size = 1 +      // header
+            if is_large { 4 } else { 1 } +  // is_large
+            (self.offsets.len() + 1) * offset_size as usize; // offsets and 
data size

Review Comment:
   Fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> {
 
     /// Finalizes this list and appends it to its parent, which otherwise 
remains unmodified.
     pub fn finish(mut self) {
-        let data_size = self.buffer.offset();
+        let buffer = self.parent_state.buffer();
+
+        let data_size = buffer.offset() - self.parent_value_offset_base;
+
         let num_elements = self.offsets.len();
         let is_large = num_elements > u8::MAX as usize;
         let offset_size = int_size(data_size);
 
-        // Get parent's buffer
-        let parent_buffer = self.parent_state.buffer();
-        let starting_offset = parent_buffer.offset();
+        let starting_offset = self.parent_value_offset_base;
+
+        let header_size = 1 +      // header

Review Comment:
   fixed



##########
parquet-variant/src/builder.rs:
##########
@@ -70,6 +70,13 @@ fn write_offset_at_pos(buf: &mut [u8], start_pos: usize, 
value: usize, nbytes: u
     buf[start_pos..start_pos + nbytes as 
usize].copy_from_slice(&bytes[..nbytes as usize]);
 }
 
+/// Append `value_bytes` of given `value` into `dest`.

Review Comment:
   fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to