scovich commented on code in PR #8185: URL: https://github.com/apache/arrow-rs/pull/8185#discussion_r2289016779
########## parquet-variant/src/builder.rs: ########## @@ -589,72 +582,190 @@ impl<S: AsRef<str>> Extend<S> for MetadataBuilder { /// parent, and we cannot "split" a mutable reference across two objects (parent state and the child /// builder that uses it). So everything has to be here. Rust layout optimizations should treat the /// variants as a union, so that accessing a `buffer` or `metadata_builder` is branch-free. +#[derive(Debug)] enum ParentState<'a> { Variant { buffer: &'a mut ValueBuffer, + saved_buffer_offset: usize, metadata_builder: &'a mut MetadataBuilder, + saved_metadata_builder_dict_size: usize, + finished: bool, }, List { buffer: &'a mut ValueBuffer, + saved_buffer_offset: usize, metadata_builder: &'a mut MetadataBuilder, - parent_value_offset_base: usize, + saved_metadata_builder_dict_size: usize, offsets: &'a mut Vec<usize>, + saved_offsets_size: usize, + finished: bool, }, Object { buffer: &'a mut ValueBuffer, + saved_buffer_offset: usize, metadata_builder: &'a mut MetadataBuilder, + saved_metadata_builder_dict_size: usize, fields: &'a mut IndexMap<u32, usize>, - field_name: &'a str, - parent_value_offset_base: usize, + saved_fields_size: usize, + finished: bool, }, } -impl ParentState<'_> { - fn buffer(&mut self) -> &mut ValueBuffer { - match self { - ParentState::Variant { buffer, .. } => buffer, - ParentState::List { buffer, .. } => buffer, - ParentState::Object { buffer, .. } => buffer, +impl<'a> ParentState<'a> { + fn variant(buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { + ParentState::Variant { + saved_buffer_offset: buffer.offset(), + saved_metadata_builder_dict_size: metadata_builder.num_field_names(), + buffer, + metadata_builder, + finished: false, + } + } + + fn list( + buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, + offsets: &'a mut Vec<usize>, + saved_parent_buffer_offset: usize, + ) -> Self { + // The saved_parent_buffer_offset is the buffer size as of when the parent builder was + // constructed. The saved_buffer_offset is the buffer size as of now (when a child builder + // is created). The variant field_offset entry for this list element is their difference. + let saved_buffer_offset = buffer.offset(); + let saved_offsets_size = offsets.len(); + offsets.push(saved_buffer_offset - saved_parent_buffer_offset); + + ParentState::List { + saved_metadata_builder_dict_size: metadata_builder.num_field_names(), + saved_buffer_offset, + saved_offsets_size, + metadata_builder, + buffer, + offsets, + finished: false, } } + fn object( Review Comment: Good idea! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org