scovich commented on code in PR #8122:
URL: https://github.com/apache/arrow-rs/pull/8122#discussion_r2274347548


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -229,107 +328,138 @@ pub enum ShreddingState {
     // TODO: add missing state where there is neither value nor typed_value
     // Missing { metadata: BinaryViewArray },
     /// This variant has no typed_value field
-    Unshredded {
-        metadata: BinaryViewArray,
-        value: BinaryViewArray,
-    },
+    Unshredded { value: BinaryViewArray },
     /// This variant has a typed_value field and no value field
     /// meaning it is the shredded type
-    Typed {
-        metadata: BinaryViewArray,
-        typed_value: ArrayRef,
-    },
-    /// Partially shredded:
-    /// * value is an object
-    /// * typed_value is a shredded object.
+    PerfectlyShredded { typed_value: ArrayRef },
+    /// Imperfectly shredded: Shredded values reside in `typed_value` while 
those that failed to
+    /// shred reside in `value`. Missing field values are NULL in both 
columns, while NULL primitive
+    /// values have NULL `typed_value` and `Variant::Null` in `value`.
     ///
-    /// Note the spec says "Writers must not produce data where both value and
-    /// typed_value are non-null, unless the Variant value is an object."
-    PartiallyShredded {
-        metadata: BinaryViewArray,
+    /// NOTE: A partially shredded struct is a special kind of imperfect 
shredding, where
+    /// `typed_value` and `value` are both non-NULL. The `typed_value` is a 
struct containing the
+    /// subset of fields for which shredding was attempted (each field will 
then have its own value
+    /// and/or typed_value sub-fields that indicate how shredding actually 
turned out). Meanwhile,
+    /// the `value` is a variant object containing the subset of fields for 
which shredding was
+    /// not even attempted.
+    ImperfectlyShredded {
         value: BinaryViewArray,
         typed_value: ArrayRef,
     },
 }
 
 impl ShreddingState {
     /// try to create a new `ShreddingState` from the given fields
-    pub fn try_new(
-        metadata: BinaryViewArray,
-        value: Option<BinaryViewArray>,
-        typed_value: Option<ArrayRef>,
-    ) -> Result<Self, ArrowError> {
-        match (metadata, value, typed_value) {
-            (metadata, Some(value), Some(typed_value)) => 
Ok(Self::PartiallyShredded {
-                metadata,
-                value,
-                typed_value,
-            }),
-            (metadata, Some(value), None) => Ok(Self::Unshredded { metadata, 
value }),
-            (metadata, None, Some(typed_value)) => Ok(Self::Typed {
-                metadata,
-                typed_value,
-            }),
-            (_metadata_field, None, None) => 
Err(ArrowError::InvalidArgumentError(String::from(
+    pub fn try_new(inner: &StructArray) -> Result<Self, ArrowError> {
+        // Note the specification allows for any order so we must search by 
name
+
+        // Find the value field, if present
+        let value = inner
+            .column_by_name("value")
+            .map(|v| {
+                v.as_binary_view_opt().ok_or_else(|| {
+                    ArrowError::NotYetImplemented(format!(
+                        "VariantArray 'value' field must be BinaryView, got 
{}",
+                        v.data_type()
+                    ))
+                })
+            })
+            .transpose()?
+            .cloned();
+
+        // Find the typed_value field, if present
+        let typed_value = inner.column_by_name("typed_value").cloned();
+
+        match (value, typed_value) {
+            (Some(value), Some(typed_value)) => {
+                Ok(Self::ImperfectlyShredded { value, typed_value })
+            }
+            (Some(value), None) => Ok(Self::Unshredded { value }),
+            (None, Some(typed_value)) => Ok(Self::PerfectlyShredded { 
typed_value }),
+            (None, None) => Err(ArrowError::InvalidArgumentError(String::from(
                 "VariantArray has neither value nor typed_value field",
             ))),
         }
     }
 
-    /// Return a reference to the metadata field
-    pub fn metadata_field(&self) -> &BinaryViewArray {
-        match self {
-            ShreddingState::Unshredded { metadata, .. } => metadata,
-            ShreddingState::Typed { metadata, .. } => metadata,
-            ShreddingState::PartiallyShredded { metadata, .. } => metadata,
-        }
-    }
-
     /// Return a reference to the value field, if present
     pub fn value_field(&self) -> Option<&BinaryViewArray> {
         match self {
             ShreddingState::Unshredded { value, .. } => Some(value),
-            ShreddingState::Typed { .. } => None,
-            ShreddingState::PartiallyShredded { value, .. } => Some(value),
+            ShreddingState::PerfectlyShredded { .. } => None,
+            ShreddingState::ImperfectlyShredded { value, .. } => Some(value),
         }
     }
 
     /// Return a reference to the typed_value field, if present
     pub fn typed_value_field(&self) -> Option<&ArrayRef> {
         match self {
             ShreddingState::Unshredded { .. } => None,
-            ShreddingState::Typed { typed_value, .. } => Some(typed_value),
-            ShreddingState::PartiallyShredded { typed_value, .. } => 
Some(typed_value),
+            ShreddingState::PerfectlyShredded { typed_value, .. } => 
Some(typed_value),
+            ShreddingState::ImperfectlyShredded { typed_value, .. } => 
Some(typed_value),
         }
     }
 
     /// Slice all the underlying arrays
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         match self {
-            ShreddingState::Unshredded { metadata, value } => 
ShreddingState::Unshredded {
-                metadata: metadata.slice(offset, length),
-                value: value.slice(offset, length),
-            },
-            ShreddingState::Typed {
-                metadata,
-                typed_value,
-            } => ShreddingState::Typed {
-                metadata: metadata.slice(offset, length),
-                typed_value: typed_value.slice(offset, length),
-            },
-            ShreddingState::PartiallyShredded {
-                metadata,
-                value,
-                typed_value,
-            } => ShreddingState::PartiallyShredded {
-                metadata: metadata.slice(offset, length),
+            ShreddingState::Unshredded { value } => ShreddingState::Unshredded 
{
                 value: value.slice(offset, length),
-                typed_value: typed_value.slice(offset, length),
             },
+            ShreddingState::PerfectlyShredded { typed_value } => {
+                ShreddingState::PerfectlyShredded {
+                    typed_value: typed_value.slice(offset, length),
+                }
+            }
+            ShreddingState::ImperfectlyShredded { value, typed_value } => {
+                ShreddingState::ImperfectlyShredded {
+                    value: value.slice(offset, length),
+                    typed_value: typed_value.slice(offset, length),
+                }
+            }
         }
     }
 }
 
+/// Builds struct arrays from component fields
+///
+/// TODO: move to arrow crate
+#[derive(Debug, Default, Clone)]
+pub struct StructArrayBuilder {

Review Comment:
   I think it depends on whether we think this is a good builder idiom to 
introduce? 
   At least, I haven't seen anything like it elsewhere in arrow-rs?
   
   One nice property of this builder is it eliminates the risk of mismatch 
between field type and array type. But a public version of this API would still 
need to have a fallible alternative, because array lengths could still mismatch 
and cause a panic.



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -15,50 +15,208 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{Array, ArrayRef},
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
     compute::CastOptions,
     error::Result,
 };
-use arrow_schema::{ArrowError, FieldRef};
-use parquet_variant::VariantPath;
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
 
 use crate::variant_array::ShreddingState;
-use crate::variant_get::output::instantiate_output_builder;
-use crate::VariantArray;
+use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+
+use std::sync::Arc;
 
 mod output;
 
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(&'a ShreddingState),
+    /// The path element is not present in the `typed_value` column and there 
is no `value` column,
+    /// so we we know it does not exist. It, and all paths under it, are 
all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` and must be 
retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, 
including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try 
to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no 
`typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &'a ShreddingState,
+    path_element: &VariantPathElement<'_>,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element 's not present in `typed_value`, and 
`value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || {
+        if shredding_state.value_field().is_none() {
+            ShreddedPathStep::Missing
+        } else {
+            ShreddedPathStep::NotShredded
+        }
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            let Some(field) = typed_value
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .and_then(|typed_value| typed_value.column_by_name(name))
+            else {
+                return Ok(missing_path_step());
+            };
+
+            let field = field
+                .as_any()
+                .downcast_ref::<ShreddedVariantFieldArray>()

Review Comment:
   Should we continue this discussion in the other comment thread, where I 
outlined the pros and cons of this struct?



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -15,50 +15,208 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{Array, ArrayRef},
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
     compute::CastOptions,
     error::Result,
 };
-use arrow_schema::{ArrowError, FieldRef};
-use parquet_variant::VariantPath;
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
 
 use crate::variant_array::ShreddingState;
-use crate::variant_get::output::instantiate_output_builder;
-use crate::VariantArray;
+use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+
+use std::sync::Arc;
 
 mod output;
 
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(&'a ShreddingState),
+    /// The path element is not present in the `typed_value` column and there 
is no `value` column,
+    /// so we we know it does not exist. It, and all paths under it, are 
all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` and must be 
retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, 
including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try 
to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no 
`typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &'a ShreddingState,
+    path_element: &VariantPathElement<'_>,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element 's not present in `typed_value`, and 
`value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || {
+        if shredding_state.value_field().is_none() {
+            ShreddedPathStep::Missing
+        } else {
+            ShreddedPathStep::NotShredded
+        }
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            let Some(field) = typed_value
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .and_then(|typed_value| typed_value.column_by_name(name))
+            else {
+                return Ok(missing_path_step());
+            };
+
+            let field = field
+                .as_any()
+                .downcast_ref::<ShreddedVariantFieldArray>()
+                .ok_or_else(|| {
+                    // TODO: Should we blow up? Or just end the traversal and 
let the normal
+                    // variant pathing code sort out the mess that it must 
anyway be
+                    // prepared to handle?
+                    ArrowError::InvalidArgumentError(format!(
+                        "Expected a ShreddedVariantFieldArray, got {:?} 
instead",
+                        field.data_type(),
+                    ))
+                })?;
+
+            Ok(ShreddedPathStep::Success(field.shredding_state()))
+        }
+        VariantPathElement::Index { .. } => {
+            // TODO: Support array indexing. Among other things, it will 
require slicing not
+            // only the array we have here, but also the corresponding 
metadata and null masks.
+            Err(ArrowError::NotYetImplemented(
+                "Pathing into shredded variant array index".into(),
+            ))
+        }
+    }
+}
+
+/// Follows the given path as far as possible through shredded variant fields. 
If the path ends on a
+/// shredded field, return it directly. Otherwise, use a row shredder to 
follow the rest of the path
+/// and extract the requested value on a per-row basis.
+fn shredded_get_path(
+    input: &VariantArray,
+    path: &[VariantPathElement<'_>],
+    as_type: Option<&DataType>,
+) -> Result<ArrayRef> {
+    // Helper that creates a new VariantArray from the given nested value and 
typed_value columns,
+    let make_target_variant = |value: Option<BinaryViewArray>, typed_value: 
Option<ArrayRef>| {
+        let metadata = input.metadata_field().clone();
+        let nulls = input.inner().nulls().cloned();
+        VariantArray::from_parts(
+            metadata,
+            value,
+            typed_value,
+            nulls,
+        )
+    };
+
+    // Helper that shreds a VariantArray to a specific type.
+    let shred_basic_variant = |target: VariantArray, path: VariantPath<'_>, 
as_type: Option<&DataType>| {
+        let mut builder = 
output::struct_output::make_shredding_row_builder(path, as_type)?;
+        for i in 0..target.len() {
+            if target.is_null(i) {
+                builder.append_null()?;
+            } else {
+                builder.append_value(&target.value(i))?;
+            }
+        }
+        builder.finish()
+    };
+
+    // Peel away the prefix of path elements that traverses the shredded parts 
of this variant
+    // column. Shredding will traverse the rest of the path on a per-row basis.
+    let mut shredding_state = input.shredding_state();
+    let mut path_index = 0;
+    for path_element in path {
+        match follow_shredded_path_element(shredding_state, path_element)? {
+            ShreddedPathStep::Success(state) => {
+                shredding_state = state;
+                path_index += 1;
+                continue;
+            }
+            ShreddedPathStep::Missing => {
+                let num_rows = input.len();
+                let arr = match as_type {
+                    Some(data_type) => 
Arc::new(array::new_null_array(data_type, num_rows)) as _,
+                    None => Arc::new(array::NullArray::new(num_rows)) as _,
+                };
+                return Ok(arr);
+            }
+            ShreddedPathStep::NotShredded => {
+                let target = 
make_target_variant(shredding_state.value_field().cloned(), None);
+                return shred_basic_variant(target, path[path_index..].into(), 
as_type);
+            }
+        };
+    }
+
+    // Path exhausted! Create a new `VariantArray` for the location we landed 
on.
+    let target = make_target_variant(
+        shredding_state.value_field().cloned(),
+        shredding_state.typed_value_field().cloned(),
+    );
+
+    // If our caller did not request any specific type, we can just return 
whatever we landed on.
+    let Some(data_type) = as_type else {
+        return Ok(Arc::new(target));
+    };
+
+    // Structs are special. Recurse into each field separately, hoping to 
follow the shredding even
+    // further, and build up the final struct from those individually shredded 
results.
+    if let DataType::Struct(fields) = data_type {
+        let children = fields
+            .iter()
+            .map(|field| {
+                shredded_get_path(
+                    &target,
+                    &[VariantPathElement::from(field.name().as_str())],
+                    Some(field.data_type()),
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        return Ok(Arc::new(StructArray::try_new(
+            fields.clone(),
+            children,
+            target.nulls().cloned(),
+        )?));
+    }
+
+    // Not a struct, so directly shred the variant as the requested type
+    shred_basic_variant(target, VariantPath::default(), as_type)
+}
+
 /// Returns an array with the specified path extracted from the variant values.
 ///
 /// The return array type depends on the `as_type` field of the options 
parameter
 /// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
 ///    to the specified path.
 /// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+///
+/// TODO: How would a caller request a struct or list type where the 
fields/elements can be any
+/// variant? Caller can pass None as the requested type to fetch a specific 
path, but it would
+/// quickly become annoying (and inefficient) to call `variant_get` for each 
leaf value in a struct or
+/// list and then try to assemble the results.

Review Comment:
   Actually, an idea!
   
   What if we introduce a companion utility, `variant_get_schema`, which 
instead of returning an `ArrayRef` returns a `DataType`? 
   
   That way, a caller who wants to extract one or more variant fields as part 
of a larger struct can fetch the actual variant shredding schema for each such 
field and splice it into the data type they pass to `variant_get`.
   
   For example, suppose I want to work with the following logical schema:
   ```
   STRUCT {
       a: STRUCT {
           id: INT,
           payload: VARIANT,
       },
       b: STRUCT {
           key: STRING,
           payload: VARIANT,
       },
   }
   ```
   Then I could make two calls to `variant_get_schema` followed by a (now 
precisely targeted) call to `variant_get`:
   ```rust
   // hand-waving the path argument for simplicity...
   let a_payload_type = variant_get_schema(input, &["a", "payload"])?;
   let b_payload_type = variant_get_schema(input, &["b", "payload"])?;
   let a_type = DataType::Struct(Fields::from([
       Field::new("id", ...),
       Field::new("payload", a_payload_type, ...),
   ]));
   let b_type = DataType::Struct(Fields::from([
       Field::new("key", ...),
       Field::new("payload", b_payload_type, ...),
   ]));
   let data_type = DataType::Struct(Fields::from([
       Field::new("a", a_type, ...),
       Field::new("b", b_type, ...),
   ]));
   let data = variant_get(input, &[], Some(data_type))?;
   ```
   
   If the path ended on a shredded variant, `variant_get_schema` would return 
the corresponding shredding schema, allowing `variant_get` to return the 
corresponding sub-arrays unchanged. Otherwise, it would just return a binary 
variant schema, since `variant_get` would anyway have to extract the bytes row 
by row.



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -229,107 +328,138 @@ pub enum ShreddingState {
     // TODO: add missing state where there is neither value nor typed_value
     // Missing { metadata: BinaryViewArray },
     /// This variant has no typed_value field
-    Unshredded {
-        metadata: BinaryViewArray,
-        value: BinaryViewArray,
-    },
+    Unshredded { value: BinaryViewArray },
     /// This variant has a typed_value field and no value field
     /// meaning it is the shredded type
-    Typed {
-        metadata: BinaryViewArray,
-        typed_value: ArrayRef,
-    },
-    /// Partially shredded:
-    /// * value is an object
-    /// * typed_value is a shredded object.
+    PerfectlyShredded { typed_value: ArrayRef },

Review Comment:
   > My understanding was that if `typed_value` was non null, it contains the 
value and an implementation doesn't even have to check the `value` field
   
   For primitive values and arrays, yes -- `(NULL, NULL)` is an invalid case 
and `(Variant::Null, NULL)` translates to `Variant::Null` (which casts to 
`NULL` of any other type).
   
   Objects have a multi-step process to access field `f`:
   1. Is the `typed_value` NULL?
      * Yes => Not even an object => have to use `value` (or just fail, since 
nothing casts to struct)
         * It is illegal for `value` to also be NULL, because this is the 
top-level
      * No => [partially] shredded object => continue (ignore `value`)
   2. Does the column `typed_value.f` exist?
      * No => `f` was not in the shredding schema => have to check `value` 
which may or may not be a variant object
      * Yes => `f` was in the shredding schema => continue
   3. Is `typed_value.f.typed_value` NULL?
      * Yes => This row does not contain `f`, or `f` is the wrong type => fall 
back to `typed_value.f.value`
         * If `value` is also NULL then `f` is NULL; otherwise, `f` could be 
any variant object (including `Variant::Null`)
      * No => This row contains `f` and it is shredded (ignore 
`typed_value.f.value`)



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -15,50 +15,208 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{Array, ArrayRef},
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
     compute::CastOptions,
     error::Result,
 };
-use arrow_schema::{ArrowError, FieldRef};
-use parquet_variant::VariantPath;
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
 
 use crate::variant_array::ShreddingState;
-use crate::variant_get::output::instantiate_output_builder;
-use crate::VariantArray;
+use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+
+use std::sync::Arc;
 
 mod output;
 
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(&'a ShreddingState),
+    /// The path element is not present in the `typed_value` column and there 
is no `value` column,
+    /// so we we know it does not exist. It, and all paths under it, are 
all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` and must be 
retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, 
including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try 
to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no 
`typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &'a ShreddingState,
+    path_element: &VariantPathElement<'_>,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element 's not present in `typed_value`, and 
`value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || {
+        if shredding_state.value_field().is_none() {
+            ShreddedPathStep::Missing
+        } else {
+            ShreddedPathStep::NotShredded
+        }
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            let Some(field) = typed_value
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .and_then(|typed_value| typed_value.column_by_name(name))
+            else {
+                return Ok(missing_path_step());
+            };
+
+            let field = field
+                .as_any()
+                .downcast_ref::<ShreddedVariantFieldArray>()
+                .ok_or_else(|| {
+                    // TODO: Should we blow up? Or just end the traversal and 
let the normal
+                    // variant pathing code sort out the mess that it must 
anyway be
+                    // prepared to handle?
+                    ArrowError::InvalidArgumentError(format!(
+                        "Expected a ShreddedVariantFieldArray, got {:?} 
instead",
+                        field.data_type(),
+                    ))
+                })?;
+
+            Ok(ShreddedPathStep::Success(field.shredding_state()))
+        }
+        VariantPathElement::Index { .. } => {
+            // TODO: Support array indexing. Among other things, it will 
require slicing not
+            // only the array we have here, but also the corresponding 
metadata and null masks.
+            Err(ArrowError::NotYetImplemented(
+                "Pathing into shredded variant array index".into(),
+            ))
+        }
+    }
+}
+
+/// Follows the given path as far as possible through shredded variant fields. 
If the path ends on a
+/// shredded field, return it directly. Otherwise, use a row shredder to 
follow the rest of the path
+/// and extract the requested value on a per-row basis.
+fn shredded_get_path(
+    input: &VariantArray,
+    path: &[VariantPathElement<'_>],
+    as_type: Option<&DataType>,
+) -> Result<ArrayRef> {
+    // Helper that creates a new VariantArray from the given nested value and 
typed_value columns,
+    let make_target_variant = |value: Option<BinaryViewArray>, typed_value: 
Option<ArrayRef>| {
+        let metadata = input.metadata_field().clone();
+        let nulls = input.inner().nulls().cloned();
+        VariantArray::from_parts(
+            metadata,
+            value,
+            typed_value,
+            nulls,
+        )
+    };
+
+    // Helper that shreds a VariantArray to a specific type.
+    let shred_basic_variant = |target: VariantArray, path: VariantPath<'_>, 
as_type: Option<&DataType>| {
+        let mut builder = 
output::struct_output::make_shredding_row_builder(path, as_type)?;
+        for i in 0..target.len() {
+            if target.is_null(i) {
+                builder.append_null()?;
+            } else {
+                builder.append_value(&target.value(i))?;
+            }
+        }
+        builder.finish()
+    };
+
+    // Peel away the prefix of path elements that traverses the shredded parts 
of this variant
+    // column. Shredding will traverse the rest of the path on a per-row basis.
+    let mut shredding_state = input.shredding_state();
+    let mut path_index = 0;
+    for path_element in path {
+        match follow_shredded_path_element(shredding_state, path_element)? {
+            ShreddedPathStep::Success(state) => {
+                shredding_state = state;
+                path_index += 1;
+                continue;
+            }
+            ShreddedPathStep::Missing => {
+                let num_rows = input.len();
+                let arr = match as_type {
+                    Some(data_type) => 
Arc::new(array::new_null_array(data_type, num_rows)) as _,
+                    None => Arc::new(array::NullArray::new(num_rows)) as _,
+                };
+                return Ok(arr);
+            }
+            ShreddedPathStep::NotShredded => {
+                let target = 
make_target_variant(shredding_state.value_field().cloned(), None);
+                return shred_basic_variant(target, path[path_index..].into(), 
as_type);
+            }
+        };
+    }
+
+    // Path exhausted! Create a new `VariantArray` for the location we landed 
on.
+    let target = make_target_variant(
+        shredding_state.value_field().cloned(),
+        shredding_state.typed_value_field().cloned(),
+    );
+
+    // If our caller did not request any specific type, we can just return 
whatever we landed on.
+    let Some(data_type) = as_type else {
+        return Ok(Arc::new(target));
+    };
+
+    // Structs are special. Recurse into each field separately, hoping to 
follow the shredding even
+    // further, and build up the final struct from those individually shredded 
results.
+    if let DataType::Struct(fields) = data_type {
+        let children = fields
+            .iter()
+            .map(|field| {
+                shredded_get_path(
+                    &target,
+                    &[VariantPathElement::from(field.name().as_str())],
+                    Some(field.data_type()),
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        return Ok(Arc::new(StructArray::try_new(
+            fields.clone(),
+            children,
+            target.nulls().cloned(),
+        )?));
+    }
+
+    // Not a struct, so directly shred the variant as the requested type
+    shred_basic_variant(target, VariantPath::default(), as_type)
+}
+
 /// Returns an array with the specified path extracted from the variant values.
 ///
 /// The return array type depends on the `as_type` field of the options 
parameter
 /// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
 ///    to the specified path.
 /// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+///
+/// TODO: How would a caller request a struct or list type where the 
fields/elements can be any
+/// variant? Caller can pass None as the requested type to fetch a specific 
path, but it would
+/// quickly become annoying (and inefficient) to call `variant_get` for each 
leaf value in a struct or
+/// list and then try to assemble the results.

Review Comment:
   > I think maybe we can actually punt on casting the result to a proper 
struct array recursively
   
   Assuming the caller did, in fact, ask for a fully strongly typed struct (no 
variant leaf fields), I'm pretty sure this PR already covers the solution? Even 
if they ask for a variant leaf field, they just pay a performance penalty to 
shred/unshred/reshred each field if the underlying shredding doesn't match the 
requested shredding.



##########
parquet-variant-compute/src/variant_get/output/struct_output.rs:
##########
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, AsArray as _, NullBufferBuilder};
+use arrow::datatypes;
+use arrow::datatypes::{ArrowPrimitiveType, FieldRef};
+use arrow::error::{ArrowError, Result};
+use parquet_variant::{Variant, VariantObject, VariantPath};
+
+use std::sync::Arc;
+
+#[allow(unused)]
+pub(crate) fn make_shredding_row_builder(
+    //metadata: &BinaryViewArray,
+    path: VariantPath<'_>,
+    data_type: Option<&datatypes::DataType>,
+) -> Result<Box<dyn VariantShreddingRowBuilder>> {
+    todo!() // wire it all up!
+}
+
+/// Builder for shredding variant values into strongly typed Arrow arrays.
+///
+/// Useful for variant_get kernels that need to extract specific paths from 
variant values, possibly
+/// with casting of leaf values to specific types.
+#[allow(unused)]
+pub(crate) trait VariantShreddingRowBuilder {
+    fn append_null(&mut self) -> Result<()>;
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>;
+
+    fn finish(&mut self) -> Result<ArrayRef>;
+}
+
+/// A thin wrapper whose only job is to extract a specific path from a variant 
value and pass the
+/// result to a nested builder.
+#[allow(unused)]
+struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> {
+    builder: T,
+    path: VariantPath<'a>,
+}
+
+impl<T: VariantShreddingRowBuilder> VariantShreddingRowBuilder for 
VariantPathRowBuilder<'_, T> {
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null()
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        if let Some(v) = value.get_path(&self.path) {
+            self.builder.append_value(&v)
+        } else {
+            self.builder.append_null()?;
+            Ok(false)
+        }
+    }
+    fn finish(&mut self) -> Result<ArrayRef> {
+        self.builder.finish()
+    }
+}
+
+/// Helper trait for converting `Variant` values to arrow primitive values.
+#[allow(unused)]
+trait VariantAsPrimitive<T: ArrowPrimitiveType> {
+    fn as_primitive(&self) -> Option<T::Native>;
+}
+impl VariantAsPrimitive<datatypes::Int32Type> for Variant<'_, '_> {
+    fn as_primitive(&self) -> Option<i32> {
+        self.as_int32()
+    }
+}
+impl VariantAsPrimitive<datatypes::Float64Type> for Variant<'_, '_> {
+    fn as_primitive(&self) -> Option<f64> {
+        self.as_f64()
+    }
+}
+
+/// Builder for shredding variant values to primitive values
+#[allow(unused)]
+struct PrimitiveVariantShreddingRowBuilder<T: ArrowPrimitiveType> {
+    builder: arrow::array::PrimitiveBuilder<T>,
+}
+
+impl<T> VariantShreddingRowBuilder for PrimitiveVariantShreddingRowBuilder<T>
+where
+    T: ArrowPrimitiveType,
+    for<'m, 'v> Variant<'m, 'v>: VariantAsPrimitive<T>,
+{
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        if let Some(v) = value.as_primitive() {
+            self.builder.append_value(v);
+            Ok(true)
+        } else {
+            self.builder.append_null(); // TODO: handle casting failure
+            Ok(false)
+        }
+    }
+
+    fn finish(&mut self) -> Result<ArrayRef> {
+        Ok(Arc::new(self.builder.finish()))
+    }
+}
+
+/// Builder for appending raw binary variant values to a BinaryViewArray. It 
copies the bytes
+/// as-is, without any decoding.
+#[allow(unused)]
+struct BinaryVariantRowBuilder {
+    nulls: NullBufferBuilder,
+}
+
+impl VariantShreddingRowBuilder for BinaryVariantRowBuilder {
+    fn append_null(&mut self) -> Result<()> {
+        self.nulls.append_null();
+        Ok(())
+    }
+    fn append_value(&mut self, _value: &Variant<'_, '_>) -> Result<bool> {
+        // We need a way to convert a Variant directly to bytes. In 
particular, we want to just copy
+        // across the underlying value byte slice of a `Variant::Object` or 
`Variant::List`, without
+        // any interaction with a `VariantMetadata` (because we will just 
reuse the existing one).
+        //
+        // One could _probably_ emulate this with 
parquet_variant::VariantBuilder, but it would do a
+        // lot of unnecessary work and would also create a new metadata column 
we don't need.

Review Comment:
   Interesting thought! I had forgotten about `VariantBuilderExt`. It _almost_ 
solves the issue with building up (or filtering down) partially shredded 
objects -- its `new_object` method could return a builder used to add the 
desired fields. Except (a) `ObjectBuilder` is a concrete type, not a trait, so 
we can't customize its behavior; and (b) that still needs to mess with string 
field names and metadata dictionaries. We'd really want to define a raw 
`VariantObjectField` -- basically a (field_id, bytes) pair -- and have a method 
for appending _that_ to the builder.



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -192,7 +202,96 @@ impl VariantArray {
 
     /// Return a reference to the metadata field of the [`StructArray`]
     pub fn metadata_field(&self) -> &BinaryViewArray {
-        self.shredding_state.metadata_field()
+        &self.metadata
+    }
+
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
+    }
+
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
+    }
+}
+
+/// One shredded field of a partially or prefectly shredded variant. For 
example, suppose the
+/// shredding schema for variant `v` treats it as an object with a single 
field `a`, where `a` is
+/// itself a struct with the single field `b` of type INT. Then the physical 
layout of the column
+/// is:
+///
+/// ```text
+/// v: VARIANT {
+///     metadata: BINARY,
+///     value: BINARY,
+///     typed_value: STRUCT {
+///         a: SHREDDED_VARIANT_FIELD {
+///             value: BINARY,
+///             typed_value: STRUCT {
+///                 a: SHREDDED_VARIANT_FIELD {

Review Comment:
   ```suggestion
   ///                 b: SHREDDED_VARIANT_FIELD {
   ```



##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -15,50 +15,208 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{Array, ArrayRef},
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
     compute::CastOptions,
     error::Result,
 };
-use arrow_schema::{ArrowError, FieldRef};
-use parquet_variant::VariantPath;
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
 
 use crate::variant_array::ShreddingState;
-use crate::variant_get::output::instantiate_output_builder;
-use crate::VariantArray;
+use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+
+use std::sync::Arc;
 
 mod output;
 
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(&'a ShreddingState),
+    /// The path element is not present in the `typed_value` column and there 
is no `value` column,
+    /// so we we know it does not exist. It, and all paths under it, are 
all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` and must be 
retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, 
including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try 
to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no 
`typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &'a ShreddingState,
+    path_element: &VariantPathElement<'_>,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element 's not present in `typed_value`, and 
`value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || {
+        if shredding_state.value_field().is_none() {
+            ShreddedPathStep::Missing
+        } else {
+            ShreddedPathStep::NotShredded
+        }
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            let Some(field) = typed_value
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .and_then(|typed_value| typed_value.column_by_name(name))
+            else {
+                return Ok(missing_path_step());
+            };
+
+            let field = field
+                .as_any()
+                .downcast_ref::<ShreddedVariantFieldArray>()
+                .ok_or_else(|| {
+                    // TODO: Should we blow up? Or just end the traversal and 
let the normal
+                    // variant pathing code sort out the mess that it must 
anyway be
+                    // prepared to handle?
+                    ArrowError::InvalidArgumentError(format!(
+                        "Expected a ShreddedVariantFieldArray, got {:?} 
instead",
+                        field.data_type(),
+                    ))
+                })?;
+
+            Ok(ShreddedPathStep::Success(field.shredding_state()))
+        }
+        VariantPathElement::Index { .. } => {
+            // TODO: Support array indexing. Among other things, it will 
require slicing not
+            // only the array we have here, but also the corresponding 
metadata and null masks.
+            Err(ArrowError::NotYetImplemented(
+                "Pathing into shredded variant array index".into(),
+            ))
+        }
+    }
+}
+
+/// Follows the given path as far as possible through shredded variant fields. 
If the path ends on a
+/// shredded field, return it directly. Otherwise, use a row shredder to 
follow the rest of the path
+/// and extract the requested value on a per-row basis.
+fn shredded_get_path(
+    input: &VariantArray,
+    path: &[VariantPathElement<'_>],
+    as_type: Option<&DataType>,
+) -> Result<ArrayRef> {
+    // Helper that creates a new VariantArray from the given nested value and 
typed_value columns,
+    let make_target_variant = |value: Option<BinaryViewArray>, typed_value: 
Option<ArrayRef>| {
+        let metadata = input.metadata_field().clone();
+        let nulls = input.inner().nulls().cloned();
+        VariantArray::from_parts(
+            metadata,
+            value,
+            typed_value,
+            nulls,
+        )
+    };
+
+    // Helper that shreds a VariantArray to a specific type.
+    let shred_basic_variant = |target: VariantArray, path: VariantPath<'_>, 
as_type: Option<&DataType>| {
+        let mut builder = 
output::struct_output::make_shredding_row_builder(path, as_type)?;
+        for i in 0..target.len() {
+            if target.is_null(i) {
+                builder.append_null()?;
+            } else {
+                builder.append_value(&target.value(i))?;
+            }
+        }
+        builder.finish()
+    };
+
+    // Peel away the prefix of path elements that traverses the shredded parts 
of this variant
+    // column. Shredding will traverse the rest of the path on a per-row basis.
+    let mut shredding_state = input.shredding_state();
+    let mut path_index = 0;
+    for path_element in path {
+        match follow_shredded_path_element(shredding_state, path_element)? {
+            ShreddedPathStep::Success(state) => {
+                shredding_state = state;
+                path_index += 1;
+                continue;
+            }
+            ShreddedPathStep::Missing => {
+                let num_rows = input.len();
+                let arr = match as_type {
+                    Some(data_type) => 
Arc::new(array::new_null_array(data_type, num_rows)) as _,
+                    None => Arc::new(array::NullArray::new(num_rows)) as _,
+                };
+                return Ok(arr);
+            }
+            ShreddedPathStep::NotShredded => {
+                let target = 
make_target_variant(shredding_state.value_field().cloned(), None);
+                return shred_basic_variant(target, path[path_index..].into(), 
as_type);
+            }
+        };
+    }
+
+    // Path exhausted! Create a new `VariantArray` for the location we landed 
on.
+    let target = make_target_variant(
+        shredding_state.value_field().cloned(),
+        shredding_state.typed_value_field().cloned(),
+    );
+
+    // If our caller did not request any specific type, we can just return 
whatever we landed on.
+    let Some(data_type) = as_type else {
+        return Ok(Arc::new(target));
+    };
+
+    // Structs are special. Recurse into each field separately, hoping to 
follow the shredding even
+    // further, and build up the final struct from those individually shredded 
results.
+    if let DataType::Struct(fields) = data_type {
+        let children = fields
+            .iter()
+            .map(|field| {
+                shredded_get_path(
+                    &target,
+                    &[VariantPathElement::from(field.name().as_str())],
+                    Some(field.data_type()),
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        return Ok(Arc::new(StructArray::try_new(
+            fields.clone(),
+            children,
+            target.nulls().cloned(),
+        )?));
+    }
+
+    // Not a struct, so directly shred the variant as the requested type
+    shred_basic_variant(target, VariantPath::default(), as_type)
+}
+
 /// Returns an array with the specified path extracted from the variant values.
 ///
 /// The return array type depends on the `as_type` field of the options 
parameter
 /// 1. `as_type: None`: a VariantArray is returned. The values in this new 
VariantArray will point
 ///    to the specified path.
 /// 2. `as_type: Some(<specific field>)`: an array of the specified type is 
returned.
+///
+/// TODO: How would a caller request a struct or list type where the 
fields/elements can be any
+/// variant? Caller can pass None as the requested type to fetch a specific 
path, but it would
+/// quickly become annoying (and inefficient) to call `variant_get` for each 
leaf value in a struct or
+/// list and then try to assemble the results.

Review Comment:
   > Maybe one way to handle this case would be to extract the fields your code 
wants to work with as individual arrays, rather than trying to get back a 
single StructArray with the relevant fields
   
   Agreed, but that would be pretty inefficient if each individual leaf column 
needs a separate `variant_get` call that has to repeat the work of pathing 
through common path prefixes. I suppose the caller could take on the work of 
memoizing common path prefixes, but then they're creating a lot of intermediate 
arrays. Not just annoying, but also slow because it forces columnar access for 
row-wise pathing.
   
   We could potentially define a bulk API like `variant_get_many`, but then we 
would need to somehow identify the common paths, deduplicate them, and then 
recursively process them... and that internal recursion would _still_ need a 
way to say "just give me whatever is at the end of each path". If we found an 
elegant way to communicate that internally, we could just make that solution 
the public API and be done with it.



##########
parquet-variant-compute/src/variant_get/output/struct_output.rs:
##########
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, AsArray as _, NullBufferBuilder};
+use arrow::datatypes;
+use arrow::datatypes::{ArrowPrimitiveType, FieldRef};
+use arrow::error::{ArrowError, Result};
+use parquet_variant::{Variant, VariantObject, VariantPath};
+
+use std::sync::Arc;
+
+#[allow(unused)]
+pub(crate) fn make_shredding_row_builder(
+    //metadata: &BinaryViewArray,
+    path: VariantPath<'_>,
+    data_type: Option<&datatypes::DataType>,
+) -> Result<Box<dyn VariantShreddingRowBuilder>> {
+    todo!() // wire it all up!
+}
+
+/// Builder for shredding variant values into strongly typed Arrow arrays.
+///
+/// Useful for variant_get kernels that need to extract specific paths from 
variant values, possibly
+/// with casting of leaf values to specific types.
+#[allow(unused)]
+pub(crate) trait VariantShreddingRowBuilder {
+    fn append_null(&mut self) -> Result<()>;
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool>;
+
+    fn finish(&mut self) -> Result<ArrayRef>;
+}
+
+/// A thin wrapper whose only job is to extract a specific path from a variant 
value and pass the
+/// result to a nested builder.
+#[allow(unused)]
+struct VariantPathRowBuilder<'a, T: VariantShreddingRowBuilder> {
+    builder: T,
+    path: VariantPath<'a>,
+}
+
+impl<T: VariantShreddingRowBuilder> VariantShreddingRowBuilder for 
VariantPathRowBuilder<'_, T> {
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null()
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        if let Some(v) = value.get_path(&self.path) {
+            self.builder.append_value(&v)
+        } else {
+            self.builder.append_null()?;
+            Ok(false)
+        }
+    }
+    fn finish(&mut self) -> Result<ArrayRef> {
+        self.builder.finish()
+    }
+}
+
+/// Helper trait for converting `Variant` values to arrow primitive values.
+#[allow(unused)]
+trait VariantAsPrimitive<T: ArrowPrimitiveType> {
+    fn as_primitive(&self) -> Option<T::Native>;
+}
+impl VariantAsPrimitive<datatypes::Int32Type> for Variant<'_, '_> {
+    fn as_primitive(&self) -> Option<i32> {
+        self.as_int32()
+    }
+}
+impl VariantAsPrimitive<datatypes::Float64Type> for Variant<'_, '_> {
+    fn as_primitive(&self) -> Option<f64> {
+        self.as_f64()
+    }
+}
+
+/// Builder for shredding variant values to primitive values
+#[allow(unused)]
+struct PrimitiveVariantShreddingRowBuilder<T: ArrowPrimitiveType> {
+    builder: arrow::array::PrimitiveBuilder<T>,
+}
+
+impl<T> VariantShreddingRowBuilder for PrimitiveVariantShreddingRowBuilder<T>
+where
+    T: ArrowPrimitiveType,
+    for<'m, 'v> Variant<'m, 'v>: VariantAsPrimitive<T>,
+{
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        if let Some(v) = value.as_primitive() {
+            self.builder.append_value(v);
+            Ok(true)
+        } else {
+            self.builder.append_null(); // TODO: handle casting failure
+            Ok(false)
+        }
+    }
+
+    fn finish(&mut self) -> Result<ArrayRef> {
+        Ok(Arc::new(self.builder.finish()))
+    }
+}
+
+/// Builder for appending raw binary variant values to a BinaryViewArray. It 
copies the bytes
+/// as-is, without any decoding.
+#[allow(unused)]
+struct BinaryVariantRowBuilder {
+    nulls: NullBufferBuilder,
+}
+
+impl VariantShreddingRowBuilder for BinaryVariantRowBuilder {
+    fn append_null(&mut self) -> Result<()> {
+        self.nulls.append_null();
+        Ok(())
+    }
+    fn append_value(&mut self, _value: &Variant<'_, '_>) -> Result<bool> {
+        // We need a way to convert a Variant directly to bytes. In 
particular, we want to just copy
+        // across the underlying value byte slice of a `Variant::Object` or 
`Variant::List`, without
+        // any interaction with a `VariantMetadata` (because we will just 
reuse the existing one).
+        //
+        // One could _probably_ emulate this with 
parquet_variant::VariantBuilder, but it would do a
+        // lot of unnecessary work and would also create a new metadata column 
we don't need.
+        todo!()
+    }
+
+    fn finish(&mut self) -> Result<ArrayRef> {
+        // What `finish` does will depend strongly on how `append_value` ends 
up working. But
+        // ultimately we'll create and return a `VariantArray` instance.
+        todo!()
+    }
+}
+
+/// Builder that extracts a struct. Casting failures produce NULL or error 
according to options.
+#[allow(unused)]
+struct StructVariantShreddingRowBuilder {
+    nulls: NullBufferBuilder,
+    field_builders: Vec<(FieldRef, Box<dyn VariantShreddingRowBuilder>)>,
+}
+
+impl VariantShreddingRowBuilder for StructVariantShreddingRowBuilder {
+    fn append_null(&mut self) -> Result<()> {
+        for (_, builder) in &mut self.field_builders {
+            builder.append_null()?;
+        }
+        self.nulls.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        // Casting failure if it's not even an object.
+        let Variant::Object(value) = value else {
+            // TODO: handle casting failure
+            self.append_null()?;
+            return Ok(false);
+        };
+
+        // Process each field. If the field is missing, it becomes NULL. If 
the field is present,
+        // the child builder handles it from there, and a failed cast could 
produce NULL or error.
+        //
+        // TODO: This loop costs `O(m lg n)` where `m` is the number of fields 
in this builder and
+        // `n` is the number of fields in the variant object we're probing. 
Given that `m` and `n`
+        // could both be large -- indepentently of each other -- we should 
consider doing something
+        // more clever that bounds the cost to O(m + n).
+        for (field, builder) in &mut self.field_builders {
+            match value.get(field.name()) {
+                None => builder.append_null()?,
+                Some(v) => {
+                    builder.append_value(&v)?;
+                }
+            }
+        }
+        self.nulls.append_non_null();
+        Ok(true)
+    }
+
+    fn finish(&mut self) -> Result<ArrayRef> {
+        let mut fields = Vec::with_capacity(self.field_builders.len());
+        let mut arrays = Vec::with_capacity(self.field_builders.len());
+        for (field, mut builder) in std::mem::take(&mut self.field_builders) {
+            fields.push(field);
+            arrays.push(builder.finish()?);
+        }
+        Ok(Arc::new(arrow::array::StructArray::try_new(
+            fields.into(),
+            arrays,
+            self.nulls.finish(),
+        )?))
+    }
+}
+
+/// Used for actual shredding of binary variant values into shredded variant 
values
+#[allow(unused)]
+struct ShreddedVariantRowBuilder {

Review Comment:
   That's exactly what it would be used for. If the user passed a shredded 
variant type to `variant_get` and the path ends inside a binary variant, we 
have no choice but to shred accordingly (\*\*) (\*\*\*).
   
   If the user just passed a concrete type to `variant_get`, and the path ends 
inside a binary variant, we would instead use e.g. 
`PrimitiveVariantShreddingRowBuilder` or `StructVariantShreddingRowBuilder`.
   
   (\*\*) Things get complicated fast, if the user passed a shredded variant 
type to `variant_get` and the path ends inside a shredded variant with a 
different schema. This pathfinding PR didn't even touch that case, but it would 
probably look vaguely similar to the struct specialization, where we try to 
follow the common/compatible paths of the two shredding schemas (in hopes of 
returning unmodified columns directly) and fall back to actual builders only 
where the paths diverge.
   
   
   (\*\*\*) If the user passed a binary variant schema and the underlying data 
were shredded, then we have to use a `BinaryVariantRowBuilder` to insert the 
variant bytes produced by e.g. `VariantArray::value`.



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -192,7 +202,96 @@ impl VariantArray {
 
     /// Return a reference to the metadata field of the [`StructArray`]
     pub fn metadata_field(&self) -> &BinaryViewArray {
-        self.shredding_state.metadata_field()
+        &self.metadata
+    }
+
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
+    }
+
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
+    }
+}
+
+/// One shredded field of a partially or prefectly shredded variant. For 
example, suppose the
+/// shredding schema for variant `v` treats it as an object with a single 
field `a`, where `a` is
+/// itself a struct with the single field `b` of type INT. Then the physical 
layout of the column
+/// is:
+///
+/// ```text
+/// v: VARIANT {
+///     metadata: BINARY,
+///     value: BINARY,
+///     typed_value: STRUCT {
+///         a: SHREDDED_VARIANT_FIELD {
+///             value: BINARY,
+///             typed_value: STRUCT {
+///                 a: SHREDDED_VARIANT_FIELD {
+///                     value: BINARY,
+///                     typed_value: INT,
+///                 },
+///             },
+///         },
+///     },
+/// }
+/// ```
+///
+/// In the above, each row of `v.value` is either a variant value (shredding 
failed, `v` was not an

Review Comment:
   Those examples all look correct, yes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to