scovich commented on code in PR #7911: URL: https://github.com/apache/arrow-rs/pull/7911#discussion_r2223866277
########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -168,7 +166,169 @@ impl VariantArrayBuilder { self.value_buffer.extend_from_slice(value); } - // TODO: Return a Variant builder that will write to the underlying buffers (TODO) + /// Return a `VariantArrayVariantBuilder` that writes directly to the + /// buffers of this builder. + /// + /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder + /// + /// # Example + /// ``` + /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt}; + /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder}; + /// let mut array_builder = VariantArrayBuilder::new(10); + /// + /// // First row has a string + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder.append_value("Hello, World!"); + /// // must call finish to write the variant to the buffers + /// variant_builder.finish(); + /// + /// // Second row is an object + /// let mut variant_builder = array_builder.variant_builder(); + /// let mut obj = variant_builder.new_object(); + /// obj.insert("my_field", 42i64); + /// obj.finish().unwrap(); + /// variant_builder.finish(); + /// + /// // finalize the array + /// let variant_array: VariantArray = array_builder.build(); + /// + /// // verify what we wrote is still there + /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!")); + /// assert!(variant_array.value(1).as_object().is_some()); + /// ``` + pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder { + // append directly into the metadata and value buffers + let metadata_buffer = std::mem::take(&mut self.metadata_buffer); + let value_buffer = std::mem::take(&mut self.value_buffer); + VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer) + } +} + +/// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`. +/// +/// Note this struct implements [`VariantBuilderExt`], so it can be used +/// as a drop-in replacement for [`VariantBuilder`] in most cases. +/// +/// If [`Self::finish`] is not called, any changes will be rolled back +/// +/// See [`VariantArrayBuilder::variant_builder`] for an example +pub struct VariantArrayVariantBuilder<'a> { + /// was finish called? + finished: bool, + /// starting offset in the variant_builder's `metadata` buffer + metadata_offset: usize, + /// starting offset in the variant_builder's `value` buffer + value_offset: usize, + /// Parent array builder that this variant builder writes to. Buffers + /// have been moved into the variant builder, and must be returned on + /// drop + array_builder: &'a mut VariantArrayBuilder, + /// Builder for the in progress variant value, temporarily owns the buffers + /// from `array_builder` + variant_builder: VariantBuilder, +} + +impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> { + fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) { + self.variant_builder.append_value(value); + } + + fn new_list(&mut self) -> ListBuilder { + self.variant_builder.new_list() + } + + fn new_object(&mut self) -> ObjectBuilder { + self.variant_builder.new_object() + } +} + +impl<'a> VariantArrayVariantBuilder<'a> { + /// Constructs a new VariantArrayVariantBuilder + /// + /// Note this is not public as this is a structure that is logically + /// part of the [`VariantArrayBuilder`] and relies on its internal structure + fn new( + array_builder: &'a mut VariantArrayBuilder, + metadata_buffer: Vec<u8>, + value_buffer: Vec<u8>, + ) -> Self { + let metadata_offset = metadata_buffer.len(); + let value_offset = value_buffer.len(); + VariantArrayVariantBuilder { + finished: false, + metadata_offset, + value_offset, + variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, value_buffer), + array_builder, + } + } + + /// Return a reference to the underlying `VariantBuilder` + pub fn inner(&self) -> &VariantBuilder { + &self.variant_builder + } + + /// Return a mutable reference to the underlying `VariantBuilder` + pub fn inner_mut(&mut self) -> &mut VariantBuilder { + &mut self.variant_builder + } + + /// Called to finish the in progress variant and write it to the underlying + /// buffers + /// + /// Note if you do not call finish, on drop any changes made to the + /// underlying buffers will be rolled back. + pub fn finish(mut self) { + self.finished = true; + // Note: buffers are returned and replaced in the drop impl + } +} + +impl<'a> Drop for VariantArrayVariantBuilder<'a> { + /// If the builder was not finished, roll back any changes made to the + /// underlying buffers (by truncating them) + fn drop(&mut self) { Review Comment: That other PR is nice improvement, but the `splice` call still shifts bytes. In order to not shift bytes at all, we'd have to pre-allocate exactly the right number of header bytes _before_ recursing into the field values. And then the `splice` call would just replace the zero-filled header region with the actual header bytes, after they're known (shifting bytes only if the original guess was incorrect). ########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -168,7 +174,171 @@ impl VariantArrayBuilder { self.value_buffer.extend_from_slice(value); } - // TODO: Return a Variant builder that will write to the underlying buffers (TODO) + /// Return a `VariantArrayVariantBuilder` that writes directly to the + /// buffers of this builder. + /// + /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder + /// + /// # Example + /// ``` + /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt}; + /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder}; + /// let mut array_builder = VariantArrayBuilder::new(10); + /// + /// // First row has a string + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder.append_value("Hello, World!"); + /// // must call finish to write the variant to the buffers + /// variant_builder.finish(); + /// + /// // Second row is an object + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder + /// .new_object() + /// .with_field("my_field", 42i64) + /// .finish() + /// .unwrap(); + /// variant_builder.finish(); + /// + /// // finalize the array + /// let variant_array: VariantArray = array_builder.build(); + /// + /// // verify what we wrote is still there + /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!")); + /// assert!(variant_array.value(1).as_object().is_some()); + /// ``` + pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder { + // append directly into the metadata and value buffers + let metadata_buffer = std::mem::take(&mut self.metadata_buffer); + let value_buffer = std::mem::take(&mut self.value_buffer); + VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer) + } +} + +/// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`. +/// +/// Note this struct implements [`VariantBuilderExt`], so it can be used +/// as a drop-in replacement for [`VariantBuilder`] in most cases. +/// +/// If [`Self::finish`] is not called, any changes will be rolled back +/// +/// See [`VariantArrayBuilder::variant_builder`] for an example +pub struct VariantArrayVariantBuilder<'a> { + /// was finish called? + finished: bool, + /// starting offset in the variant_builder's `metadata` buffer + metadata_offset: usize, + /// starting offset in the variant_builder's `value` buffer + value_offset: usize, + /// Parent array builder that this variant builder writes to. Buffers + /// have been moved into the variant builder, and must be returned on + /// drop + array_builder: &'a mut VariantArrayBuilder, + /// Builder for the in progress variant value, temporarily owns the buffers + /// from `array_builder` + variant_builder: VariantBuilder, +} + +impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> { + fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) { + self.variant_builder.append_value(value); + } + + fn new_list(&mut self) -> ListBuilder { + self.variant_builder.new_list() + } + + fn new_object(&mut self) -> ObjectBuilder { + self.variant_builder.new_object() + } +} + +impl<'a> VariantArrayVariantBuilder<'a> { + /// Constructs a new VariantArrayVariantBuilder + /// + /// Note this is not public as this is a structure that is logically + /// part of the [`VariantArrayBuilder`] and relies on its internal structure + fn new( + array_builder: &'a mut VariantArrayBuilder, + metadata_buffer: Vec<u8>, + value_buffer: Vec<u8>, + ) -> Self { + let metadata_offset = metadata_buffer.len(); + let value_offset = value_buffer.len(); + VariantArrayVariantBuilder { + finished: false, + metadata_offset, + value_offset, + variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, value_buffer), + array_builder, + } + } + + /// Return a reference to the underlying `VariantBuilder` + pub fn inner(&self) -> &VariantBuilder { + &self.variant_builder + } + + /// Return a mutable reference to the underlying `VariantBuilder` + pub fn inner_mut(&mut self) -> &mut VariantBuilder { + &mut self.variant_builder + } + + /// Called to finish the in progress variant and write it to the underlying + /// buffers + /// + /// Note if you do not call finish, on drop any changes made to the + /// underlying buffers will be rolled back. + pub fn finish(mut self) { + self.finished = true; + // Note: buffers are returned and replaced in the drop impl + } +} + +impl<'a> Drop for VariantArrayVariantBuilder<'a> { + /// If the builder was not finished, roll back any changes made to the + /// underlying buffers (by truncating them) + fn drop(&mut self) { + let metadata_offset = self.metadata_offset; + let value_offset = self.value_offset; + + // get the buffers back from the variant builder + let (mut metadata_buffer, mut value_buffer) = + std::mem::take(&mut self.variant_builder).finish(); + + // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost) + let metadata_len = metadata_buffer + .len() + .checked_sub(metadata_offset) + .expect("metadata length decreased unexpectedly"); + let value_len = value_buffer + .len() + .checked_sub(value_offset) + .expect("value length decreased unexpectedly"); Review Comment: These assertions should be impossible to trigger, no? And if anyway the result is a panic, is the unchecked integer underflow panic somehow worse than a failed expect? (if we think these could actually trigger in practice, that seems like a reason to move the checks to a fallible `finish` instead of infallible `drop`?) ########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -55,9 +55,14 @@ use std::sync::Arc; /// }; /// builder.append_variant_buffers(&metadata, &value); Review Comment: It does seem preferable to have only one good way of doing things, rather than leaving a less efficient other way? ########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -168,7 +174,171 @@ impl VariantArrayBuilder { self.value_buffer.extend_from_slice(value); } - // TODO: Return a Variant builder that will write to the underlying buffers (TODO) + /// Return a `VariantArrayVariantBuilder` that writes directly to the + /// buffers of this builder. + /// + /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder + /// + /// # Example + /// ``` + /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt}; + /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder}; + /// let mut array_builder = VariantArrayBuilder::new(10); + /// + /// // First row has a string + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder.append_value("Hello, World!"); + /// // must call finish to write the variant to the buffers + /// variant_builder.finish(); + /// + /// // Second row is an object + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder + /// .new_object() + /// .with_field("my_field", 42i64) + /// .finish() + /// .unwrap(); + /// variant_builder.finish(); + /// + /// // finalize the array + /// let variant_array: VariantArray = array_builder.build(); + /// + /// // verify what we wrote is still there + /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!")); + /// assert!(variant_array.value(1).as_object().is_some()); + /// ``` + pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder { + // append directly into the metadata and value buffers + let metadata_buffer = std::mem::take(&mut self.metadata_buffer); + let value_buffer = std::mem::take(&mut self.value_buffer); + VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer) + } +} + +/// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`. +/// +/// Note this struct implements [`VariantBuilderExt`], so it can be used +/// as a drop-in replacement for [`VariantBuilder`] in most cases. +/// +/// If [`Self::finish`] is not called, any changes will be rolled back +/// +/// See [`VariantArrayBuilder::variant_builder`] for an example +pub struct VariantArrayVariantBuilder<'a> { + /// was finish called? + finished: bool, + /// starting offset in the variant_builder's `metadata` buffer + metadata_offset: usize, + /// starting offset in the variant_builder's `value` buffer + value_offset: usize, + /// Parent array builder that this variant builder writes to. Buffers + /// have been moved into the variant builder, and must be returned on + /// drop + array_builder: &'a mut VariantArrayBuilder, + /// Builder for the in progress variant value, temporarily owns the buffers + /// from `array_builder` + variant_builder: VariantBuilder, +} + +impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> { + fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) { + self.variant_builder.append_value(value); + } + + fn new_list(&mut self) -> ListBuilder { + self.variant_builder.new_list() + } + + fn new_object(&mut self) -> ObjectBuilder { + self.variant_builder.new_object() + } +} + +impl<'a> VariantArrayVariantBuilder<'a> { + /// Constructs a new VariantArrayVariantBuilder + /// + /// Note this is not public as this is a structure that is logically + /// part of the [`VariantArrayBuilder`] and relies on its internal structure + fn new( + array_builder: &'a mut VariantArrayBuilder, + metadata_buffer: Vec<u8>, + value_buffer: Vec<u8>, + ) -> Self { + let metadata_offset = metadata_buffer.len(); + let value_offset = value_buffer.len(); + VariantArrayVariantBuilder { + finished: false, + metadata_offset, + value_offset, + variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, value_buffer), + array_builder, + } + } + + /// Return a reference to the underlying `VariantBuilder` + pub fn inner(&self) -> &VariantBuilder { + &self.variant_builder + } + + /// Return a mutable reference to the underlying `VariantBuilder` + pub fn inner_mut(&mut self) -> &mut VariantBuilder { + &mut self.variant_builder + } + + /// Called to finish the in progress variant and write it to the underlying + /// buffers + /// + /// Note if you do not call finish, on drop any changes made to the + /// underlying buffers will be rolled back. + pub fn finish(mut self) { + self.finished = true; + // Note: buffers are returned and replaced in the drop impl + } +} + +impl<'a> Drop for VariantArrayVariantBuilder<'a> { + /// If the builder was not finished, roll back any changes made to the + /// underlying buffers (by truncating them) + fn drop(&mut self) { + let metadata_offset = self.metadata_offset; + let value_offset = self.value_offset; + + // get the buffers back from the variant builder + let (mut metadata_buffer, mut value_buffer) = + std::mem::take(&mut self.variant_builder).finish(); + + // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost) + let metadata_len = metadata_buffer + .len() + .checked_sub(metadata_offset) + .expect("metadata length decreased unexpectedly"); + let value_len = value_buffer + .len() + .checked_sub(value_offset) + .expect("value length decreased unexpectedly"); Review Comment: (meanwhile, we should probably move those inside the `if finished` block, since the `else` doesn't use them and a panic during unwind is an immediate double-fault abort?) ########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -168,7 +174,171 @@ impl VariantArrayBuilder { self.value_buffer.extend_from_slice(value); } - // TODO: Return a Variant builder that will write to the underlying buffers (TODO) + /// Return a `VariantArrayVariantBuilder` that writes directly to the + /// buffers of this builder. + /// + /// You must call [`VariantArrayVariantBuilder::finish`] to complete the builder + /// + /// # Example + /// ``` + /// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt}; + /// # use parquet_variant_compute::{VariantArray, VariantArrayBuilder}; + /// let mut array_builder = VariantArrayBuilder::new(10); + /// + /// // First row has a string + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder.append_value("Hello, World!"); + /// // must call finish to write the variant to the buffers + /// variant_builder.finish(); + /// + /// // Second row is an object + /// let mut variant_builder = array_builder.variant_builder(); + /// variant_builder + /// .new_object() + /// .with_field("my_field", 42i64) + /// .finish() + /// .unwrap(); + /// variant_builder.finish(); + /// + /// // finalize the array + /// let variant_array: VariantArray = array_builder.build(); + /// + /// // verify what we wrote is still there + /// assert_eq!(variant_array.value(0), Variant::from("Hello, World!")); + /// assert!(variant_array.value(1).as_object().is_some()); + /// ``` + pub fn variant_builder(&mut self) -> VariantArrayVariantBuilder { + // append directly into the metadata and value buffers + let metadata_buffer = std::mem::take(&mut self.metadata_buffer); + let value_buffer = std::mem::take(&mut self.value_buffer); + VariantArrayVariantBuilder::new(self, metadata_buffer, value_buffer) + } +} + +/// A `VariantBuilder` that writes directly to the buffers of a `VariantArrayBuilder`. +/// +/// Note this struct implements [`VariantBuilderExt`], so it can be used +/// as a drop-in replacement for [`VariantBuilder`] in most cases. +/// +/// If [`Self::finish`] is not called, any changes will be rolled back +/// +/// See [`VariantArrayBuilder::variant_builder`] for an example +pub struct VariantArrayVariantBuilder<'a> { + /// was finish called? + finished: bool, + /// starting offset in the variant_builder's `metadata` buffer + metadata_offset: usize, + /// starting offset in the variant_builder's `value` buffer + value_offset: usize, + /// Parent array builder that this variant builder writes to. Buffers + /// have been moved into the variant builder, and must be returned on + /// drop + array_builder: &'a mut VariantArrayBuilder, + /// Builder for the in progress variant value, temporarily owns the buffers + /// from `array_builder` + variant_builder: VariantBuilder, +} + +impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> { + fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) { + self.variant_builder.append_value(value); + } + + fn new_list(&mut self) -> ListBuilder { + self.variant_builder.new_list() + } + + fn new_object(&mut self) -> ObjectBuilder { + self.variant_builder.new_object() + } +} + +impl<'a> VariantArrayVariantBuilder<'a> { + /// Constructs a new VariantArrayVariantBuilder + /// + /// Note this is not public as this is a structure that is logically + /// part of the [`VariantArrayBuilder`] and relies on its internal structure + fn new( + array_builder: &'a mut VariantArrayBuilder, + metadata_buffer: Vec<u8>, + value_buffer: Vec<u8>, + ) -> Self { + let metadata_offset = metadata_buffer.len(); + let value_offset = value_buffer.len(); + VariantArrayVariantBuilder { + finished: false, + metadata_offset, + value_offset, + variant_builder: VariantBuilder::new_with_buffers(metadata_buffer, value_buffer), + array_builder, + } + } + + /// Return a reference to the underlying `VariantBuilder` + pub fn inner(&self) -> &VariantBuilder { + &self.variant_builder + } + + /// Return a mutable reference to the underlying `VariantBuilder` + pub fn inner_mut(&mut self) -> &mut VariantBuilder { + &mut self.variant_builder + } + + /// Called to finish the in progress variant and write it to the underlying + /// buffers + /// + /// Note if you do not call finish, on drop any changes made to the + /// underlying buffers will be rolled back. + pub fn finish(mut self) { + self.finished = true; + // Note: buffers are returned and replaced in the drop impl + } +} + +impl<'a> Drop for VariantArrayVariantBuilder<'a> { + /// If the builder was not finished, roll back any changes made to the + /// underlying buffers (by truncating them) + fn drop(&mut self) { + let metadata_offset = self.metadata_offset; + let value_offset = self.value_offset; + + // get the buffers back from the variant builder + let (mut metadata_buffer, mut value_buffer) = + std::mem::take(&mut self.variant_builder).finish(); Review Comment: One potential reason to move the finish logic out of `drop` -- we rely on a `finish` call at L307 above; imagine if that `finish` call didn't actually trigger any changes until the unwinding stack frame dropped the object? Most likely we'd be working with unexpected buffers for the rest of this method? Tho I guess if there were any observable side effects like that, the corresponding `&mut` reference should still be live until drop, and the compiler would block any badness? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org