liamzwbao commented on code in PR #8831:
URL: https://github.com/apache/arrow-rs/pull/8831#discussion_r2615323834


##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -236,6 +253,285 @@ impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
     }
 }
 
+pub(crate) struct VariantToShreddedArrayVariantRowBuilder<'a> {
+    value_builder: VariantValueArrayBuilder,
+    typed_value_builder: ArrayVariantToArrowRowBuilder<'a>,
+}
+
+impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
+    fn try_new(
+        data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        Ok(Self {
+            value_builder: VariantValueArrayBuilder::new(capacity),
+            typed_value_builder: ArrayVariantToArrowRowBuilder::try_new(
+                data_type,
+                cast_options,
+                capacity,
+            )?,
+        })
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        self.value_builder.append_value(Variant::Null);
+        self.typed_value_builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, variant: Variant<'_, '_>) -> Result<bool> {
+        // If the variant is not an array, typed_value must be null.
+        // If the variant is an array, value must be null.
+        match variant {
+            Variant::List(list) => {
+                self.value_builder.append_null();
+                self.typed_value_builder.append_value(list)?;
+                Ok(true)
+            }
+            other => {
+                self.value_builder.append_value(other);
+                self.typed_value_builder.append_null();
+                Ok(false)
+            }
+        }
+    }
+
+    fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> 
{
+        Ok((
+            self.value_builder.build()?,
+            self.typed_value_builder.finish()?,
+            // All elements of an array must be present (not missing) because
+            // the array Variant encoding does not allow missing elements
+            None,
+        ))
+    }
+}
+
+enum ArrayVariantToArrowRowBuilder<'a> {
+    List(VariantToListArrowRowBuilder<'a, i32>),
+    LargeList(VariantToListArrowRowBuilder<'a, i64>),
+    ListView(VariantToListViewArrowRowBuilder<'a, i32>),
+    LargeListView(VariantToListViewArrowRowBuilder<'a, i64>),
+}
+
+impl<'a> ArrayVariantToArrowRowBuilder<'a> {
+    fn try_new(
+        data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        use ArrayVariantToArrowRowBuilder::*;
+
+        let builder = match data_type {
+            DataType::List(field) => 
List(VariantToListArrowRowBuilder::try_new(
+                field.clone(),
+                field.data_type(),
+                cast_options,
+                capacity,
+            )?),
+            DataType::LargeList(field) => 
LargeList(VariantToListArrowRowBuilder::try_new(
+                field.clone(),
+                field.data_type(),
+                cast_options,
+                capacity,
+            )?),
+            DataType::ListView(field) => 
ListView(VariantToListViewArrowRowBuilder::try_new(
+                field.clone(),
+                field.data_type(),
+                cast_options,
+                capacity,
+            )?),
+            DataType::LargeListView(field) => {
+                LargeListView(VariantToListViewArrowRowBuilder::try_new(
+                    field.clone(),
+                    field.data_type(),
+                    cast_options,
+                    capacity,
+                )?)
+            }
+            other => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Casting to {other:?} is not applicable for array Variant 
types"
+                )));
+            }
+        };
+        Ok(builder)
+    }
+
+    fn append_null(&mut self) {
+        match self {
+            Self::List(builder) => builder.append_null(),
+            Self::LargeList(builder) => builder.append_null(),
+            Self::ListView(builder) => builder.append_null(),
+            Self::LargeListView(builder) => builder.append_null(),
+        }
+    }
+
+    fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
+        match self {
+            Self::List(builder) => builder.append_value(list),
+            Self::LargeList(builder) => builder.append_value(list),
+            Self::ListView(builder) => builder.append_value(list),
+            Self::LargeListView(builder) => builder.append_value(list),
+        }
+    }
+
+    fn finish(self) -> Result<ArrayRef> {
+        match self {
+            Self::List(builder) => builder.finish(),
+            Self::LargeList(builder) => builder.finish(),
+            Self::ListView(builder) => builder.finish(),
+            Self::LargeListView(builder) => builder.finish(),
+        }
+    }
+}
+
+struct VariantToListArrowRowBuilder<'a, O: OffsetSizeTrait + 
ArrowNativeTypeOp> {
+    field: FieldRef,
+    offsets: Vec<O>,
+    element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
+    nulls: NullBufferBuilder,
+    current_offset: O,
+}
+
+impl<'a, O: OffsetSizeTrait + ArrowNativeTypeOp> 
VariantToListArrowRowBuilder<'a, O> {
+    fn try_new(
+        field: FieldRef,
+        element_data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        let mut offsets = 
Vec::with_capacity(capacity.checked_add(1).ok_or_else(|| {
+            ArrowError::ComputeError(
+                "Capacity exceeded usize::MAX when reserving list 
offsets".to_string(),
+            )
+        })?);
+        offsets.push(O::ZERO);
+        let element_builder = 
make_variant_to_shredded_variant_arrow_row_builder(
+            element_data_type,
+            cast_options,
+            capacity,
+            false,
+        )?;
+        Ok(Self {
+            field,
+            offsets,
+            element_builder: Box::new(element_builder),
+            nulls: NullBufferBuilder::new(capacity),
+            current_offset: O::ZERO,
+        })
+    }
+
+    fn append_null(&mut self) {
+        self.offsets.push(self.current_offset);
+        self.nulls.append_null();
+    }
+
+    fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
+        for element in list.iter() {
+            self.element_builder.append_value(element)?;
+            self.current_offset = self.current_offset.add_checked(O::ONE)?;
+        }
+        self.offsets.push(self.current_offset);
+        self.nulls.append_non_null();
+        Ok(())
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        let (value, typed_value, nulls) = self.element_builder.finish()?;
+        let element_array =
+            ShreddedVariantFieldArray::from_parts(Some(value), 
Some(typed_value), nulls);
+        let field = Arc::new(
+            self.field
+                .as_ref()
+                .clone()
+                .with_data_type(element_array.data_type().clone()),
+        );
+        let offsets = OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets));
+        let list_array = GenericListArray::<O>::new(
+            field,
+            offsets,
+            ArrayRef::from(element_array),
+            self.nulls.finish(),
+        );
+        Ok(Arc::new(list_array))
+    }
+}
+
+struct VariantToListViewArrowRowBuilder<'a, O: OffsetSizeTrait + 
ArrowNativeTypeOp> {
+    field: FieldRef,
+    offsets: Vec<O>,
+    sizes: Vec<O>,
+    element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
+    nulls: NullBufferBuilder,
+    current_offset: O,
+}

Review Comment:
   Makes sense, we can merge these 2 builders with the new const



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to