scovich commented on code in PR #8360: URL: https://github.com/apache/arrow-rs/pull/8360#discussion_r2357541537
########## parquet-variant-compute/src/variant_array_builder.rs: ########## @@ -205,6 +208,156 @@ impl VariantBuilderExt for VariantArrayBuilder { } } +/// A builder for creating only the value column of a [`VariantArray`] +/// +/// This builder is used when you have existing metadata and only need to build +/// the value column. It's useful for scenarios like variant unshredding, data +/// transformation, or filtering where you want to reuse existing metadata. +/// +/// The builder produces a [`BinaryViewArray`] that can be combined with existing +/// metadata to create a complete [`VariantArray`]. +/// +/// # Example: +/// ``` +/// # use arrow::array::Array; +/// # use parquet_variant::{Variant}; +/// # use parquet_variant_compute::VariantValueArrayBuilder; +/// // Create a variant value builder for 10 rows +/// let mut builder = VariantValueArrayBuilder::new(10); +/// +/// // Append some values with their corresponding metadata. In practice, some of the variant +/// // values would be objects with internal references to pre-existing metadata, which the +/// // builder takes advantage of to avoid creating new metadata. +/// builder.append_value(Variant::from(42)); +/// builder.append_null(); +/// builder.append_value(Variant::from("hello")); +/// +/// // Build the final value array +/// let value_array = builder.build().unwrap(); +/// assert_eq!(value_array.len(), 3); +/// ``` +#[derive(Debug)] +pub struct VariantValueArrayBuilder { + value_builder: ValueBuilder, + value_offsets: Vec<usize>, + nulls: NullBufferBuilder, +} + +impl VariantValueArrayBuilder { + /// Create a new `VariantValueArrayBuilder` with the specified row capacity + pub fn new(row_capacity: usize) -> Self { + Self { + value_builder: ValueBuilder::new(), + value_offsets: Vec::with_capacity(row_capacity), + nulls: NullBufferBuilder::new(row_capacity), + } + } + + /// Build the final value array + /// + /// Returns a [`BinaryViewArray`] containing the serialized variant values. + /// This can be combined with existing metadata to create a complete [`VariantArray`]. + pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> { + let value_buffer = self.value_builder.into_inner(); + let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets); + if let Some(nulls) = self.nulls.finish() { + let (views, buffers, _) = array.into_parts(); + array = BinaryViewArray::try_new(views, buffers, Some(nulls))?; + } + Ok(array) + } + + /// Append a null row to the builder + /// + /// WARNING: It is only valid to call this method when building the `value` field of a shredded + /// variant column (which is nullable). The `value` field of a binary (unshredded) variant + /// column is non-nullable, and callers should instead invoke [`Self::append_value`] with + /// `Variant::Null`, passing the appropriate metadata value. + pub fn append_null(&mut self) { + self.value_offsets.push(self.value_builder.offset()); + self.nulls.append_null(); + } + + /// Append a variant value with its corresponding metadata + /// + /// # Arguments + /// * `value` - The variant value to append + /// * `metadata` - The metadata dictionary for this variant (used for field name resolution) + /// + /// # Returns + /// * `Ok(())` if the value was successfully appended + /// * `Err(ArrowError)` if the variant contains field names not found in the metadata + /// + /// # Example + /// ``` + /// # use parquet_variant::Variant; + /// # use parquet_variant_compute::VariantValueArrayBuilder; + /// let mut builder = VariantValueArrayBuilder::new(10); + /// builder.append_value(Variant::from(42)); + /// ``` + pub fn append_value(&mut self, value: Variant<'_, '_>) { + let metadata = value.metadata().cloned().unwrap_or(EMPTY_VARIANT_METADATA); Review Comment: That's a great question! Until now, that method was only used in tests, so its semantics weren't clear. I like the idea a lot tho -- it would eliminate the `unwrap_or` (which my next PR also has to use). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org