klion26 commented on code in PR #7987: URL: https://github.com/apache/arrow-rs/pull/7987#discussion_r2233945914
########## parquet-variant/src/builder.rs: ########## @@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> { /// Finalizes this list and appends it to its parent, which otherwise remains unmodified. pub fn finish(mut self) { - let data_size = self.buffer.offset(); + let buffer = self.parent_state.buffer(); + + let data_size = buffer.offset() - self.parent_value_offset_base; Review Comment: fixed ########## parquet-variant/src/builder.rs: ########## @@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> { let starting_offset = self.parent_value_offset_base; + let header_size = 1 + // header + if is_large { 4 } else { 1 } + // is_large Review Comment: fixed ########## parquet-variant/src/builder.rs: ########## @@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> { /// Finalizes this list and appends it to its parent, which otherwise remains unmodified. pub fn finish(mut self) { - let data_size = self.buffer.offset(); + let buffer = self.parent_state.buffer(); + + let data_size = buffer.offset() - self.parent_value_offset_base; + let num_elements = self.offsets.len(); let is_large = num_elements > u8::MAX as usize; let offset_size = int_size(data_size); - // Get parent's buffer - let parent_buffer = self.parent_state.buffer(); - let starting_offset = parent_buffer.offset(); + let starting_offset = self.parent_value_offset_base; + + let header_size = 1 + // header + if is_large { 4 } else { 1 } + // is_large Review Comment: fixed ########## parquet-variant/src/builder.rs: ########## @@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> { let starting_offset = self.parent_value_offset_base; + let header_size = 1 + // header + if is_large { 4 } else { 1 } + // is_large + (self.offsets.len() + 1) * offset_size as usize; // offsets and data size + + // Calculated header size becomes a hint; being wrong only risks extra allocations. + // Make sure to reserve enough capacity to handle the extra bytes we'll truncate. + let mut bytes_to_splice = Vec::with_capacity(header_size + 3); // Write header let header = array_header(is_large, offset_size); + bytes_to_splice.push(header); - let num_elements_bytes = - num_elements - .to_le_bytes() - .into_iter() - .take(if is_large { 4 } else { 1 }); - let offsets = PackedU32Iterator::new( - offset_size as usize, - self.offsets - .clone() - .into_iter() - .map(|offset| (offset as u32).to_le_bytes()), + append_packed_u32( + &mut bytes_to_splice, + num_elements as u32, + if is_large { 4 } else { 1 }, Review Comment: fixed ########## parquet-variant/src/builder.rs: ########## @@ -1121,16 +1099,27 @@ impl VariantBuilder { pub struct ListBuilder<'a> { parent_state: ParentState<'a>, offsets: Vec<usize>, - buffer: ValueBuffer, + /// The starting offset in the parent's buffer where this list starts + parent_value_offset_base: usize, + /// The starting offset in the parent's metadata buffer where this list starts + /// used to truncate the written fields in `drop` if the current list has not been finished + parent_metadata_offset_base: usize, + /// Whether the list has been finished, the written content of the current list + /// will be truncated in `drop` if `has_been_finished` is false + has_been_finished: bool, validate_unique_fields: bool, } impl<'a> ListBuilder<'a> { fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> Self { + let offset_base = parent_state.buffer_current_offset(); + let meta_offset_base = parent_state.metadata_current_offset(); Self { parent_state, offsets: vec![], - buffer: ValueBuffer::default(), + parent_value_offset_base: offset_base, + has_been_finished: false, + parent_metadata_offset_base: meta_offset_base, Review Comment: Has changed the local variable name, the current implementation aims to make the compiler happy, as `parent_state` has been moved before(the first parameter). ########## parquet-variant/src/builder.rs: ########## @@ -1256,29 +1221,28 @@ impl<'a> ListBuilder<'a> { let starting_offset = self.parent_value_offset_base; + let header_size = 1 + // header + if is_large { 4 } else { 1 } + // is_large + (self.offsets.len() + 1) * offset_size as usize; // offsets and data size Review Comment: Fixed ########## parquet-variant/src/builder.rs: ########## @@ -1216,24 +1211,45 @@ impl<'a> ListBuilder<'a> { /// Finalizes this list and appends it to its parent, which otherwise remains unmodified. pub fn finish(mut self) { - let data_size = self.buffer.offset(); + let buffer = self.parent_state.buffer(); + + let data_size = buffer.offset() - self.parent_value_offset_base; + let num_elements = self.offsets.len(); let is_large = num_elements > u8::MAX as usize; let offset_size = int_size(data_size); - // Get parent's buffer - let parent_buffer = self.parent_state.buffer(); - let starting_offset = parent_buffer.offset(); + let starting_offset = self.parent_value_offset_base; + + let header_size = 1 + // header Review Comment: fixed ########## parquet-variant/src/builder.rs: ########## @@ -70,6 +70,13 @@ fn write_offset_at_pos(buf: &mut [u8], start_pos: usize, value: usize, nbytes: u buf[start_pos..start_pos + nbytes as usize].copy_from_slice(&bytes[..nbytes as usize]); } +/// Append `value_bytes` of given `value` into `dest`. Review Comment: fixed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org