alamb commented on code in PR #8366:
URL: https://github.com/apache/arrow-rs/pull/8366#discussion_r2364374103


##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -102,77 +104,142 @@ impl<'a> VariantToArrowRowBuilder<'a> {
             Float16(b) => b.finish(),
             Float32(b) => b.finish(),
             Float64(b) => b.finish(),
+        }
+    }
+}
+
+impl<'a> VariantToArrowRowBuilder<'a> {
+    pub fn append_null(&mut self) -> Result<()> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_null(),
+            BinaryVariant(b) => b.append_null(),
+            WithPath(path_builder) => path_builder.append_null(),
+        }
+    }
+
+    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_value(&value),
+            BinaryVariant(b) => b.append_value(value),
+            WithPath(path_builder) => path_builder.append_value(value),
+        }
+    }
+
+    pub fn finish(self) -> Result<ArrayRef> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.finish(),
             BinaryVariant(b) => b.finish(),
             WithPath(path_builder) => path_builder.finish(),
         }
     }
 }
 
-pub(crate) fn make_variant_to_arrow_row_builder<'a>(
-    metadata: &BinaryViewArray,
-    path: VariantPath<'a>,
-    data_type: Option<&'a DataType>,
+/// Creates a primitive row builder, returning Err if the requested data type 
is not primitive.
+pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
+    data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
-) -> Result<VariantToArrowRowBuilder<'a>> {
-    use VariantToArrowRowBuilder::*;
+) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
+    use PrimitiveVariantToArrowRowBuilder::*;
 
-    let mut builder = match data_type {
-        // If no data type was requested, build an unshredded VariantArray.
-        None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
-            metadata.clone(),
-            capacity,
-        )),
-        Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new(
+    let builder = match data_type {
+        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Float16) => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Float32) => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::Float64) => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::UInt8) => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::UInt16) => 
UInt16(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::UInt32) => 
UInt32(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        Some(DataType::UInt64) => 
UInt64(VariantToPrimitiveArrowRowBuilder::new(
+        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
             cast_options,
             capacity,
         )),
-        _ => {
+        _ if data_type.is_primitive() => {
             return Err(ArrowError::NotYetImplemented(format!(
-                "variant_get with path={:?} and data_type={:?} not yet 
implemented",
-                path, data_type
+                "Primitive data_type {data_type:?} not yet implemented"
             )));
         }
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(

Review Comment:
   I tried to write a benchmark for shredding and I hit the fact that I can't 
shred Utf8 columns (which is fine, we'll do it as a follow on PR) but I will 
file a ticket to track



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -456,16 +473,13 @@ pub enum ShreddingState {
 }
 
 impl ShreddingState {
-    /// try to create a new `ShreddingState` from the given fields
-    pub fn try_new(
-        value: Option<BinaryViewArray>,
-        typed_value: Option<ArrayRef>,
-    ) -> Result<Self, ArrowError> {
+    /// Create a new `ShreddingState` from the given fields
+    pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) 
-> Self {

Review Comment:
   Amusingly, I did this exact change in 
https://github.com/apache/arrow-rs/pull/8392 too. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to