alamb commented on code in PR #8021:
URL: https://github.com/apache/arrow-rs/pull/8021#discussion_r2249343322


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -44,27 +45,90 @@ use std::sync::Arc;
 /// [document]: 
https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?usp=sharing
 #[derive(Debug)]
 pub struct VariantArray {
-    /// StructArray of up to three fields:
-    ///
-    /// 1. A required field named `metadata` which is binary, large_binary, or
-    ///    binary_view
-    ///
-    /// 2. An optional field named `value` that is binary, large_binary, or
-    ///    binary_view
-    ///
-    /// 3. An optional field named `typed_value` which can be any primitive 
type
-    ///    or be a list, large_list, list_view or struct
-    ///
-    /// NOTE: It is also permissible for the metadata field to be
-    /// Dictionary-Encoded, preferably (but not required) with an index type of
-    /// int8.
+    /// Reference to the underlying StructArray
     inner: StructArray,
 
-    /// Reference to the metadata column of inner
-    metadata_ref: ArrayRef,
+    /// how is this variant array shredded?
+    shredding_state: ShreddingState,
+}
+
+/// Variant arrays can be shredded in one of three states, encoded here

Review Comment:
   With this enum it is easier to know what the state of any particular array is



##########
parquet-variant-compute/src/variant_get/output/mod.rs:
##########
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod primitive;
+mod variant;
+
+use crate::variant_get::output::primitive::PrimitiveOutputBuilder;
+use crate::variant_get::output::variant::VariantOutputBuilder;
+use crate::variant_get::GetOptions;
+use crate::VariantArray;
+use arrow::array::{ArrayRef, BinaryViewArray};
+use arrow::datatypes::Int32Type;
+use arrow::error::Result;
+use arrow_schema::{ArrowError, DataType};
+
+/// This trait represents something that gets the output of the variant_get 
kernel.
+///
+/// For example, there are specializations for writing the output as a 
VariantArray,
+/// or as a specific type (e.g. Int32Array).
+///
+/// See [`instantiate_output_builder`] to create an instance of this trait.
+pub(crate) trait OutputBuilder {
+    /// create output for a shredded variant array
+    fn partially_shredded(
+        &self,
+        variant_array: &VariantArray,
+        metadata: &BinaryViewArray,
+        value_field: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> Result<ArrayRef>;
+
+    /// output for a perfectly shredded variant array
+    fn fully_shredded(
+        &self,
+        variant_array: &VariantArray,
+        metadata: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> Result<ArrayRef>;
+
+    /// write out an unshredded variant array
+    fn unshredded(
+        &self,
+        variant_array: &VariantArray,
+        metadata: &BinaryViewArray,
+        value_field: &BinaryViewArray,
+    ) -> Result<ArrayRef>;
+}
+
+pub(crate) fn instantiate_output_builder<'a>(

Review Comment:
   the idea here is that this creates an instance of `OutputBuilder` based on 
the requested output type



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1,180 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   I actually left most of this file the same but renamed it to 
`variant_get/mod.rs`, despite how github is rendering it



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -0,0 +1,431 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow::{
+    array::{Array, ArrayRef},
+    compute::CastOptions,
+    error::Result,
+};
+use arrow_schema::{ArrowError, FieldRef};
+use parquet_variant::VariantPath;
+
+use crate::variant_array::ShreddingState;
+use crate::variant_get::output::instantiate_output_builder;
+use crate::VariantArray;
+
+mod output;
+
+/// Returns an array with the specified path extracted from the variant values.
+///
+/// The return array type depends on the `as_type` field of the options 
parameter
+/// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
+///    to the specified path.
+/// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
+    let variant_array: &VariantArray = 
input.as_any().downcast_ref().ok_or_else(|| {
+        ArrowError::InvalidArgumentError(
+            "expected a VariantArray as the input for variant_get".to_owned(),
+        )
+    })?;
+
+    // Create the output writer based on the specified output options

Review Comment:
   The core design for get is here: Different potential builders depending on 
the type of the output array -- which gives us a place to put special code for 
each output array type, this is the `OutputBuilder` trait -- perhaps someone 
can come up with a better name. 



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -0,0 +1,431 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow::{
+    array::{Array, ArrayRef},
+    compute::CastOptions,
+    error::Result,
+};
+use arrow_schema::{ArrowError, FieldRef};
+use parquet_variant::VariantPath;
+
+use crate::variant_array::ShreddingState;
+use crate::variant_get::output::instantiate_output_builder;
+use crate::VariantArray;
+
+mod output;
+
+/// Returns an array with the specified path extracted from the variant values.
+///
+/// The return array type depends on the `as_type` field of the options 
parameter
+/// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
+///    to the specified path.
+/// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
+    let variant_array: &VariantArray = 
input.as_any().downcast_ref().ok_or_else(|| {
+        ArrowError::InvalidArgumentError(
+            "expected a VariantArray as the input for variant_get".to_owned(),
+        )
+    })?;
+
+    // Create the output writer based on the specified output options
+    let output_builder = instantiate_output_builder(options.clone())?;
+
+    // Dispatch based on the shredding state of the input variant array
+    // TODO make this an enum on VariantArray (e.g ShreddingState)
+    match variant_array.shredding_state() {
+        ShreddingState::PartiallyShredded {
+            metadata,
+            value,
+            typed_value,
+        } => output_builder.partially_shredded(variant_array, metadata, value, 
typed_value),
+        ShreddingState::FullyShredded {
+            metadata,
+            typed_value,
+        } => output_builder.fully_shredded(variant_array, metadata, 
typed_value),
+        ShreddingState::Unshredded { metadata, value } => {
+            output_builder.unshredded(variant_array, metadata, value)
+        }
+    }
+}
+
+/// Controls the action of the variant_get kernel.
+#[derive(Debug, Clone, Default)]
+pub struct GetOptions<'a> {
+    /// What path to extract
+    pub path: VariantPath<'a>,
+    /// if `as_type` is None, the returned array will itself be a VariantArray.
+    ///
+    /// if `as_type` is `Some(type)` the field is returned as the specified 
type.
+    pub as_type: Option<FieldRef>,
+    /// Controls the casting behavior (e.g. error vs substituting null on cast 
error).
+    pub cast_options: CastOptions<'a>,
+}
+
+impl<'a> GetOptions<'a> {
+    /// Construct default options to get the specified path as a variant.
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Construct options to get the specified path as a variant.
+    pub fn new_with_path(path: VariantPath<'a>) -> Self {
+        Self {
+            path,
+            as_type: None,
+            cast_options: Default::default(),
+        }
+    }
+
+    /// Specify the type to return.
+    pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
+        self.as_type = as_type;
+        self
+    }
+
+    /// Specify the cast options to use when casting to the specified type.
+    pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
+        self.cast_options = cast_options;
+        self
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use arrow::array::{Array, ArrayRef, BinaryViewArray, Int32Array, 
StringArray, StructArray};
+    use arrow::buffer::NullBuffer;
+    use arrow::compute::CastOptions;
+    use arrow_schema::{DataType, Field, FieldRef, Fields};
+    use parquet_variant::{Variant, VariantPath};
+
+    use crate::batch_json_string_to_variant;
+    use crate::VariantArray;
+
+    use super::{variant_get, GetOptions};
+
+    fn single_variant_get_test(input_json: &str, path: VariantPath, 
expected_json: &str) {
+        // Create input array from JSON string
+        let input_array_ref: ArrayRef = 
Arc::new(StringArray::from(vec![Some(input_json)]));
+        let input_variant_array_ref: ArrayRef =
+            Arc::new(batch_json_string_to_variant(&input_array_ref).unwrap());
+
+        let result =
+            variant_get(&input_variant_array_ref, 
GetOptions::new_with_path(path)).unwrap();
+
+        // Create expected array from JSON string
+        let expected_array_ref: ArrayRef = 
Arc::new(StringArray::from(vec![Some(expected_json)]));
+        let expected_variant_array = 
batch_json_string_to_variant(&expected_array_ref).unwrap();
+
+        let result_array: &VariantArray = 
result.as_any().downcast_ref().unwrap();
+        assert_eq!(
+            result_array.len(),
+            1,
+            "Expected result array to have length 1"
+        );
+        assert!(
+            result_array.nulls().is_none(),
+            "Expected no nulls in result array"
+        );
+        let result_variant = result_array.value(0);
+        let expected_variant = expected_variant_array.value(0);
+        assert_eq!(
+            result_variant, expected_variant,
+            "Result variant does not match expected variant"
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_field() {
+        single_variant_get_test(
+            r#"{"some_field": 1234}"#,
+            VariantPath::from("some_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_list_index() {
+        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_object_of_object() {
+        single_variant_get_test(
+            r#"{"top_level_field": {"inner_field": 1234}}"#,
+            VariantPath::from("top_level_field").join("inner_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_list_of_object() {
+        single_variant_get_test(
+            r#"[{"some_field": 1234}]"#,
+            VariantPath::from(0).join("some_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_object_of_list() {
+        single_variant_get_test(
+            r#"{"some_field": [1234]}"#,
+            VariantPath::from("some_field").join(0),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_complex_variant() {
+        single_variant_get_test(
+            r#"{"top_level_field": {"inner_field": 1234}}"#,
+            VariantPath::from("top_level_field"),
+            r#"{"inner_field": 1234}"#,
+        );
+    }
+
+    /// Shredding: extract a value as a VariantArray

Review Comment:
   here are the new shredded tests



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +238,76 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
-    pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+    pub fn value(&self, index: usize) -> Option<Variant> {

Review Comment:
   This is the first part -- accessing a single Variant value from a 
potentially shredded VariantArray



##########
parquet-variant-compute/src/variant_get/output/primitive.rs:
##########
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::variant_get::output::OutputBuilder;
+use crate::VariantArray;
+use arrow::error::Result;
+
+use arrow::array::{
+    Array, ArrayRef, ArrowPrimitiveType, AsArray, BinaryViewArray, 
NullBufferBuilder,
+    PrimitiveArray,
+};
+use arrow::compute::{cast_with_options, CastOptions};
+use arrow::datatypes::Int32Type;
+use arrow_schema::{ArrowError, FieldRef};
+use parquet_variant::{Variant, VariantPath};
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+/// Trait for Arrow primitive types that can be used in the output builder
+///
+/// This just exists to add a generic way to convert from Variant to the 
primitive type
+pub(super) trait ArrowPrimitiveVariant: ArrowPrimitiveType {
+    /// Try to extract the primitive value from a Variant, returning None if it
+    /// cannot be converted
+    ///
+    /// TODO: figure out how to handle coercion/casting
+    fn from_variant(variant: &Variant) -> Option<Self::Native>;
+}
+
+/// Outputs Primitive arrays
+pub(super) struct PrimitiveOutputBuilder<'a, T: ArrowPrimitiveVariant> {
+    /// What path to extract
+    path: VariantPath<'a>,
+    /// Returned output type
+    as_type: FieldRef,
+    /// Controls the casting behavior (e.g. error vs substituting null on cast 
error).
+    cast_options: CastOptions<'a>,
+    /// Phantom data for the primitive type
+    _phantom: PhantomData<T>,
+}
+
+impl<'a, T: ArrowPrimitiveVariant> PrimitiveOutputBuilder<'a, T> {
+    pub(super) fn new(
+        path: VariantPath<'a>,
+        as_type: FieldRef,
+        cast_options: CastOptions<'a>,
+    ) -> Self {
+        Self {
+            path,
+            as_type,
+            cast_options,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'a, T: ArrowPrimitiveVariant> OutputBuilder for 
PrimitiveOutputBuilder<'a, T> {
+    fn partially_shredded(
+        &self,
+        variant_array: &VariantArray,
+        _metadata: &BinaryViewArray,
+        _value_field: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // build up the output array element by element
+        let mut nulls = NullBufferBuilder::new(variant_array.len());
+        let mut values = Vec::with_capacity(variant_array.len());
+        let typed_value =
+            cast_with_options(typed_value, self.as_type.data_type(), 
&self.cast_options)?;
+        // downcast to the primitive array (e.g. Int32Array, Float64Array, etc)
+        let typed_value = typed_value.as_primitive::<T>();
+
+        for i in 0..variant_array.len() {
+            if variant_array.is_null(i) {
+                nulls.append_null();
+                values.push(T::default_value()); // not used, placeholder
+                continue;
+            }
+
+            // if the typed value is null, decode the variant and extract the 
value
+            if typed_value.is_null(i) {
+                // todo follow path
+                let Some(variant) = variant_array.value(i) else {
+                    nulls.append_null();
+                    values.push(T::default_value()); // not used, placeholder
+                    continue;
+                };
+
+                let Some(value) = T::from_variant(&variant) else {
+                    if self.cast_options.safe {
+                        // safe mode: append null if we can't convert
+                        nulls.append_null();
+                        values.push(T::default_value()); // not used, 
placeholder
+                        continue;
+                    } else {
+                        return Err(ArrowError::CastError(format!(
+                            "Failed to extract primitive of type {} from 
variant {:?} at path {:?}",
+                            self.as_type.data_type(),
+                            variant,
+                            self.path
+                        )));
+                    }
+                };
+
+                nulls.append_non_null();
+                values.push(value)
+            } else {
+                // otherwise we have a typed value, so we can use it directly
+                nulls.append_non_null();
+                values.push(typed_value.value(i));
+            }
+        }
+
+        let nulls = nulls.finish();
+        let array = PrimitiveArray::<T>::new(values.into(), nulls)
+            .with_data_type(self.as_type.data_type().clone());
+        Ok(Arc::new(array))
+    }
+
+    fn fully_shredded(
+        &self,
+        _variant_array: &VariantArray,
+        _metadata: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // if the types match exactly, we can just return the typed_value
+        if typed_value.data_type() == self.as_type.data_type() {

Review Comment:
   here is the fast path if we have perfectly shredded value



##########
parquet-variant-compute/src/variant_get/output/primitive.rs:
##########
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::variant_get::output::OutputBuilder;
+use crate::VariantArray;
+use arrow::error::Result;
+
+use arrow::array::{
+    Array, ArrayRef, ArrowPrimitiveType, AsArray, BinaryViewArray, 
NullBufferBuilder,
+    PrimitiveArray,
+};
+use arrow::compute::{cast_with_options, CastOptions};
+use arrow::datatypes::Int32Type;
+use arrow_schema::{ArrowError, FieldRef};
+use parquet_variant::{Variant, VariantPath};
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+/// Trait for Arrow primitive types that can be used in the output builder
+///
+/// This just exists to add a generic way to convert from Variant to the 
primitive type
+pub(super) trait ArrowPrimitiveVariant: ArrowPrimitiveType {
+    /// Try to extract the primitive value from a Variant, returning None if it
+    /// cannot be converted
+    ///
+    /// TODO: figure out how to handle coercion/casting
+    fn from_variant(variant: &Variant) -> Option<Self::Native>;
+}
+
+/// Outputs Primitive arrays
+pub(super) struct PrimitiveOutputBuilder<'a, T: ArrowPrimitiveVariant> {
+    /// What path to extract
+    path: VariantPath<'a>,
+    /// Returned output type
+    as_type: FieldRef,
+    /// Controls the casting behavior (e.g. error vs substituting null on cast 
error).
+    cast_options: CastOptions<'a>,
+    /// Phantom data for the primitive type
+    _phantom: PhantomData<T>,
+}
+
+impl<'a, T: ArrowPrimitiveVariant> PrimitiveOutputBuilder<'a, T> {
+    pub(super) fn new(
+        path: VariantPath<'a>,
+        as_type: FieldRef,
+        cast_options: CastOptions<'a>,
+    ) -> Self {
+        Self {
+            path,
+            as_type,
+            cast_options,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'a, T: ArrowPrimitiveVariant> OutputBuilder for 
PrimitiveOutputBuilder<'a, T> {
+    fn partially_shredded(
+        &self,
+        variant_array: &VariantArray,
+        _metadata: &BinaryViewArray,
+        _value_field: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // build up the output array element by element

Review Comment:
   This is the code that converts a shredded variant into a typed output -- It 
can probably be made quite a bit faster with some more deliberate 
vectorization, but I think it is relatively simple and functional



##########
parquet-variant-compute/src/variant_get/output/primitive.rs:
##########
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::variant_get::output::OutputBuilder;
+use crate::VariantArray;
+use arrow::error::Result;
+
+use arrow::array::{
+    Array, ArrayRef, ArrowPrimitiveType, AsArray, BinaryViewArray, 
NullBufferBuilder,
+    PrimitiveArray,
+};
+use arrow::compute::{cast_with_options, CastOptions};
+use arrow::datatypes::Int32Type;
+use arrow_schema::{ArrowError, FieldRef};
+use parquet_variant::{Variant, VariantPath};
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+/// Trait for Arrow primitive types that can be used in the output builder
+///
+/// This just exists to add a generic way to convert from Variant to the 
primitive type
+pub(super) trait ArrowPrimitiveVariant: ArrowPrimitiveType {

Review Comment:
   this one is a bit complicated due to generics but I think it will work for 
all primitive types (ints, floats, etc)



##########
parquet-variant-compute/src/variant_get/output/variant.rs:
##########
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::variant_get::output::OutputBuilder;
+use crate::{VariantArray, VariantArrayBuilder};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray};
+use arrow::datatypes::Int32Type;
+use arrow_schema::{ArrowError, DataType};
+use parquet_variant::{Variant, VariantPath};
+use std::sync::Arc;
+
+/// Outputs VariantArrays
+pub(super) struct VariantOutputBuilder<'a> {
+    /// What path to extract
+    path: VariantPath<'a>,
+}
+
+impl<'a> VariantOutputBuilder<'a> {
+    pub(super) fn new(path: VariantPath<'a>) -> Self {
+        Self { path }
+    }
+}
+
+impl<'a> OutputBuilder for VariantOutputBuilder<'a> {
+    fn partially_shredded(
+        &self,
+        variant_array: &VariantArray,
+        // TODO(perf): can reuse the metadata field here to avoid re-creating 
it
+        _metadata: &BinaryViewArray,
+        _value_field: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // in this case dispatch on the typed_value and
+        // TODO macro'ize this using downcast! to handle all other primitive 
types
+        // TODO(perf): avoid builders entirely (and write the raw variant 
directly as we know the metadata is the same)
+        let mut array_builder = VariantArrayBuilder::new(variant_array.len());
+        match typed_value.data_type() {
+            DataType::Int32 => {
+                let primitive_array = typed_value.as_primitive::<Int32Type>();
+                for i in 0..variant_array.len() {
+                    if variant_array.is_null(i) {
+                        array_builder.append_null();
+                        continue;
+                    }
+
+                    if typed_value.is_null(i) {
+                        // fall back to the value (variant) field
+                        // (TODO could copy the variant bytes directly)
+                        if let Some(value) = variant_array.value(i) {
+                            array_builder.append_variant(value);
+                        } else {
+                            // this shouldn't happen, but if it does, append 
null
+                            array_builder.append_null();
+                        }
+                        continue;
+                    }
+
+                    // otherwise we have a typed value, so we can use it 
directly
+                    let int_value = primitive_array.value(i);
+                    array_builder.append_variant(Variant::from(int_value));
+                }
+            }
+            dt => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "variant_get fully_shredded with typed_value={dt} is not 
implemented yet",
+                )));
+            }
+        };
+        Ok(Arc::new(array_builder.build()))
+    }
+
+    fn fully_shredded(
+        &self,
+        variant_array: &VariantArray,
+        // TODO(perf): can reuse the metadata field here to avoid re-creating 
it
+        _metadata: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // in this case dispatch on the typed_value and
+        // TODO macro'ize this using downcast! to handle all other primitive 
types
+        // TODO(perf): avoid builders entirely (and write the raw variant 
directly as we know the metadata is the same)
+        let mut array_builder = VariantArrayBuilder::new(variant_array.len());
+        match typed_value.data_type() {
+            DataType::Int32 => {
+                let primitive_array = typed_value.as_primitive::<Int32Type>();
+                for i in 0..variant_array.len() {
+                    if primitive_array.is_null(i) {
+                        array_builder.append_null();
+                        continue;
+                    }
+
+                    let int_value = primitive_array.value(i);
+                    array_builder.append_variant(Variant::from(int_value));
+                }
+            }
+            dt => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "variant_get fully_shredded with typed_value={dt} is not 
implemented yet",
+                )));
+            }
+        };
+        Ok(Arc::new(array_builder.build()))
+    }
+
+    fn unshredded(
+        &self,
+        variant_array: &VariantArray,
+        _metadata: &BinaryViewArray,
+        _value_field: &BinaryViewArray,
+    ) -> arrow::error::Result<ArrayRef> {
+        let mut builder = VariantArrayBuilder::new(variant_array.len());

Review Comment:
   This is the case that is handled on `main`



##########
parquet-variant-compute/src/variant_get/output/variant.rs:
##########
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::variant_get::output::OutputBuilder;
+use crate::{VariantArray, VariantArrayBuilder};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray};
+use arrow::datatypes::Int32Type;
+use arrow_schema::{ArrowError, DataType};
+use parquet_variant::{Variant, VariantPath};
+use std::sync::Arc;
+
+/// Outputs VariantArrays
+pub(super) struct VariantOutputBuilder<'a> {
+    /// What path to extract
+    path: VariantPath<'a>,
+}
+
+impl<'a> VariantOutputBuilder<'a> {
+    pub(super) fn new(path: VariantPath<'a>) -> Self {
+        Self { path }
+    }
+}
+
+impl<'a> OutputBuilder for VariantOutputBuilder<'a> {
+    fn partially_shredded(
+        &self,
+        variant_array: &VariantArray,
+        // TODO(perf): can reuse the metadata field here to avoid re-creating 
it
+        _metadata: &BinaryViewArray,
+        _value_field: &BinaryViewArray,
+        typed_value: &ArrayRef,
+    ) -> arrow::error::Result<ArrayRef> {
+        // in this case dispatch on the typed_value and
+        // TODO macro'ize this using downcast! to handle all other primitive 
types
+        // TODO(perf): avoid builders entirely (and write the raw variant 
directly as we know the metadata is the same)
+        let mut array_builder = VariantArrayBuilder::new(variant_array.len());

Review Comment:
   This is the logic to reconstruct `Variant`s from typed values



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to