scovich commented on code in PR #8021:
URL: https://github.com/apache/arrow-rs/pull/8021#discussion_r2255159393


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -93,35 +88,45 @@ impl VariantArray {
                 "Invalid VariantArray: requires StructArray as 
input".to_string(),
             ));
         };
-        // Ensure the StructArray has a metadata field of BinaryView
 
-        let Some(metadata_field) = VariantArray::find_metadata_field(inner) 
else {
+        // Note the specification allows for any order so we must search by 
name
+
+        // Ensure the StructArray has a metadata field of BinaryView
+        let Some(metadata_field) = inner.column_by_name("metadata") else {
             return Err(ArrowError::InvalidArgumentError(
                 "Invalid VariantArray: StructArray must contain a 'metadata' 
field".to_string(),
             ));
         };
-        if metadata_field.data_type() != &DataType::BinaryView {
+        let Some(metadata) = metadata_field.as_binary_view_opt() else {
             return Err(ArrowError::NotYetImplemented(format!(
                 "VariantArray 'metadata' field must be BinaryView, got {}",
                 metadata_field.data_type()
             )));
-        }
-        let Some(value_field) = VariantArray::find_value_field(inner) else {
-            return Err(ArrowError::InvalidArgumentError(
-                "Invalid VariantArray: StructArray must contain a 'value' 
field".to_string(),
-            ));
         };
-        if value_field.data_type() != &DataType::BinaryView {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "VariantArray 'value' field must be BinaryView, got {}",
-                value_field.data_type()
-            )));
-        }
+
+        // Find the value field, if present
+        let value_field = inner.column_by_name("value");
+        let value = value_field
+            .map(|v| match v.as_binary_view_opt() {
+                Some(bv) => Ok(bv),
+                None => Err(ArrowError::NotYetImplemented(format!(
+                    "VariantArray 'value' field must be BinaryView, got {}",
+                    v.data_type()
+                ))),
+            })
+            .transpose()?;

Review Comment:
   This goes from `Option` to `Option-Option` to `Option-Result` to 
`Result-Option` to `Option`? Phew!
   
   nit: potential small simplification (`unwrap_or_else` actually useful):
   ```suggestion
           let value = inner
               .column_by_name("value")
               .map(|v| v.as_binary_view_opt.unwrap_or_else(|| {
                   ArrowError::NotYetImplemented(format!(
                       "VariantArray 'value' field must be BinaryView, got {}",
                       v.data_type()
                   ))
               }))
               .transpose()?;
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -93,35 +88,45 @@ impl VariantArray {
                 "Invalid VariantArray: requires StructArray as 
input".to_string(),
             ));
         };
-        // Ensure the StructArray has a metadata field of BinaryView
 
-        let Some(metadata_field) = VariantArray::find_metadata_field(inner) 
else {
+        // Note the specification allows for any order so we must search by 
name
+
+        // Ensure the StructArray has a metadata field of BinaryView
+        let Some(metadata_field) = inner.column_by_name("metadata") else {
             return Err(ArrowError::InvalidArgumentError(
                 "Invalid VariantArray: StructArray must contain a 'metadata' 
field".to_string(),
             ));
         };
-        if metadata_field.data_type() != &DataType::BinaryView {
+        let Some(metadata) = metadata_field.as_binary_view_opt() else {
             return Err(ArrowError::NotYetImplemented(format!(

Review Comment:
   aside: Out of curiosity, do we have any style/preference guidelines for 
   ```rust
   let Some(foo) = foo_opt else {
       return Err(...);
   };
   ```
   vs. 
   ```rust
   let foo = foo_opt.unwrap_or_else(|| {
       ...
   })?;
   ```
   ?
   
   The former seems both more direct and less verbose, at least in this case?
   Maybe `unwrap_or_else` is mostly (only?) useful as part of a bigger monadic 
chain?



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {

Review Comment:
   ```suggestion
               ShreddingState::FullyShredded { typed_value, .. } => {
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::Null
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::new(metadata.value(index), value.value(index))
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+        }

Review Comment:
   Part of me wonders whether this "shredding state" enum is actually helping, 
vs. just storing `value` and `typed_value` as options? Especially since the 
[shredding 
spec](https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding)
 seems to suggest that none-none case is a valid combination?
   > ```
   > required group measurement (VARIANT) {
   >   required binary metadata;
   >   optional binary value;
   >   optional int64 typed_value;
   > }
   > ```
   
   and
   
   > <img width="658" height="84" alt="image" 
src="https://github.com/user-attachments/assets/b93e99c0-012a-4ecf-8580-245c3790d7f7";
 />
   (the file should be able to omit both physical columns, in case of a 
perfectly shredded all-null the logical column)
   
   ```suggestion
           // Always prefer typed_value over value (if present).
           match self.typed_value.as_ref().and_then(|tv| 
typed_value_to_variant(tv, index)) {
               Some(typed_value) => typed_value,
               None => self.value.as_ref().map_or(
                   Variant::Null,
                   |v| Variant::new(self.metadata.value(index), v.value(index)),
               )),
           }
   ```
   
   (this assumes that `typed_value_to_variant` includes the null check and 
returns `Option<Variant>`)
   



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::Null
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::new(metadata.value(index), value.value(index))
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+        }
     }
 
-    fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("metadata").cloned()
+    /// Return a reference to the metadata field of the [`StructArray`]
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        self.shredding_state.metadata_field()
     }
 
-    fn find_value_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("value").cloned()
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
     }
 
-    /// Return a reference to the metadata field of the [`StructArray`]
-    pub fn metadata_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.metadata_ref
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()

Review Comment:
   These become trivial if we ditch the `ShreddingState` enum



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::Null
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::new(metadata.value(index), value.value(index))
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+        }
     }
 
-    fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("metadata").cloned()
+    /// Return a reference to the metadata field of the [`StructArray`]
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        self.shredding_state.metadata_field()
     }
 
-    fn find_value_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("value").cloned()
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
     }
 
-    /// Return a reference to the metadata field of the [`StructArray`]
-    pub fn metadata_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.metadata_ref
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
     }
+}
 
-    /// Return a reference to the value field of the `StructArray`
-    pub fn value_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.value_ref
+/// Variant arrays can be shredded in one of three states, encoded here
+#[derive(Debug)]
+pub enum ShreddingState {
+    /// This variant has no typed_value field
+    Unshredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+    },
+    /// This variant has a typed_value field and no value field
+    /// meaning it is fully shredded (aka the value is stored in typed_value)
+    FullyShredded {
+        metadata: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+    /// This variant has both a value field and a typed_value field
+    /// meaning it is partially shredded: first the typed_value is used, and
+    /// if that is null, the value field is used.
+    PartiallyShredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+}
+
+impl ShreddingState {
+    /// try to create a new `ShreddingState` from the given fields
+    pub fn try_new(
+        metadata: BinaryViewArray,
+        value: Option<BinaryViewArray>,
+        typed_value: Option<ArrayRef>,
+    ) -> Result<Self, ArrowError> {
+        match (metadata, value, typed_value) {
+            (metadata, Some(value), Some(typed_value)) => 
Ok(Self::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            }),
+            (metadata, Some(value), None) => Ok(Self::Unshredded { metadata, 
value }),
+            (metadata, None, Some(typed_value)) => Ok(Self::FullyShredded {
+                metadata,
+                typed_value,
+            }),
+            (_metadata_field, None, None) => 
Err(ArrowError::InvalidArgumentError(String::from(
+                "VariantArray has neither value nor typed_value field",
+            ))),
+        }
+    }
+
+    /// Return a reference to the metadata field
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        match self {
+            ShreddingState::Unshredded { metadata, .. } => metadata,
+            ShreddingState::FullyShredded { metadata, .. } => metadata,
+            ShreddingState::PartiallyShredded { metadata, .. } => metadata,
+        }
+    }
+
+    /// Return a reference to the value field, if present
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        match self {
+            ShreddingState::Unshredded { value, .. } => Some(value),
+            ShreddingState::FullyShredded { .. } => None,
+            ShreddingState::PartiallyShredded { value, .. } => Some(value),
+        }
+    }
+
+    /// Return a reference to the typed_value field, if present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        match self {
+            ShreddingState::Unshredded { .. } => None,
+            ShreddingState::FullyShredded { typed_value, .. } => 
Some(typed_value),
+            ShreddingState::PartiallyShredded { typed_value, .. } => 
Some(typed_value),
+        }
+    }
+
+    /// Slice all the underlying arrays
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        match self {
+            ShreddingState::Unshredded { metadata, value } => 
ShreddingState::Unshredded {
+                metadata: metadata.slice(offset, length),
+                value: value.slice(offset, length),
+            },
+            ShreddingState::FullyShredded {
+                metadata,
+                typed_value,
+            } => ShreddingState::FullyShredded {
+                metadata: metadata.slice(offset, length),
+                typed_value: typed_value.slice(offset, length),
+            },
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => ShreddingState::PartiallyShredded {
+                metadata: metadata.slice(offset, length),
+                value: value.slice(offset, length),
+                typed_value: typed_value.slice(offset, length),
+            },
+        }
+    }
+}
+
+/// returns the non-null element at index as a Variant
+fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant {
+    match typed_value.data_type() {
+        DataType::Int32 => {
+            let typed_value = typed_value.as_primitive::<Int32Type>();
+            Variant::from(typed_value.value(index))

Review Comment:
   Good use for 
[downcast_primitive_array](https://docs.rs/arrow/latest/arrow/macro.downcast_primitive_array.html)
 macro?
   ```suggestion
       downcast_primitive_array! {
           typed_value => Some(Variant::from(typed_value.value(index))),
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::Null
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::new(metadata.value(index), value.value(index))
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+        }
     }
 
-    fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("metadata").cloned()
+    /// Return a reference to the metadata field of the [`StructArray`]
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        self.shredding_state.metadata_field()
     }
 
-    fn find_value_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("value").cloned()
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
     }
 
-    /// Return a reference to the metadata field of the [`StructArray`]
-    pub fn metadata_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.metadata_ref
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
     }
+}
 
-    /// Return a reference to the value field of the `StructArray`
-    pub fn value_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.value_ref
+/// Variant arrays can be shredded in one of three states, encoded here
+#[derive(Debug)]
+pub enum ShreddingState {
+    /// This variant has no typed_value field
+    Unshredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+    },
+    /// This variant has a typed_value field and no value field
+    /// meaning it is fully shredded (aka the value is stored in typed_value)
+    FullyShredded {
+        metadata: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+    /// This variant has both a value field and a typed_value field
+    /// meaning it is partially shredded: first the typed_value is used, and
+    /// if that is null, the value field is used.
+    PartiallyShredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+}
+
+impl ShreddingState {
+    /// try to create a new `ShreddingState` from the given fields
+    pub fn try_new(
+        metadata: BinaryViewArray,
+        value: Option<BinaryViewArray>,
+        typed_value: Option<ArrayRef>,
+    ) -> Result<Self, ArrowError> {
+        match (metadata, value, typed_value) {
+            (metadata, Some(value), Some(typed_value)) => 
Ok(Self::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            }),
+            (metadata, Some(value), None) => Ok(Self::Unshredded { metadata, 
value }),
+            (metadata, None, Some(typed_value)) => Ok(Self::FullyShredded {
+                metadata,
+                typed_value,
+            }),
+            (_metadata_field, None, None) => 
Err(ArrowError::InvalidArgumentError(String::from(
+                "VariantArray has neither value nor typed_value field",
+            ))),
+        }
+    }
+
+    /// Return a reference to the metadata field
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        match self {
+            ShreddingState::Unshredded { metadata, .. } => metadata,
+            ShreddingState::FullyShredded { metadata, .. } => metadata,
+            ShreddingState::PartiallyShredded { metadata, .. } => metadata,
+        }
+    }
+
+    /// Return a reference to the value field, if present
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        match self {
+            ShreddingState::Unshredded { value, .. } => Some(value),
+            ShreddingState::FullyShredded { .. } => None,
+            ShreddingState::PartiallyShredded { value, .. } => Some(value),
+        }
+    }
+
+    /// Return a reference to the typed_value field, if present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        match self {
+            ShreddingState::Unshredded { .. } => None,
+            ShreddingState::FullyShredded { typed_value, .. } => 
Some(typed_value),
+            ShreddingState::PartiallyShredded { typed_value, .. } => 
Some(typed_value),
+        }
+    }
+
+    /// Slice all the underlying arrays
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        match self {
+            ShreddingState::Unshredded { metadata, value } => 
ShreddingState::Unshredded {
+                metadata: metadata.slice(offset, length),
+                value: value.slice(offset, length),
+            },
+            ShreddingState::FullyShredded {
+                metadata,
+                typed_value,
+            } => ShreddingState::FullyShredded {
+                metadata: metadata.slice(offset, length),
+                typed_value: typed_value.slice(offset, length),
+            },
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => ShreddingState::PartiallyShredded {
+                metadata: metadata.slice(offset, length),
+                value: value.slice(offset, length),
+                typed_value: typed_value.slice(offset, length),
+            },
+        }
+    }
+}
+
+/// returns the non-null element at index as a Variant
+fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant {

Review Comment:
   Suggest to make this return `Option<Variant>` so callers don't have to check 
for null themselves.
   ```suggestion
   fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant {
       if typed_value.is_null(index) {
           return None;
       }
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -135,36 +140,189 @@ impl VariantArray {
         self.inner
     }
 
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
     /// Return the [`Variant`] instance stored at the given row
     ///
-    /// Panics if the index is out of bounds.
+    /// Consistently with other Arrow arrays types, this API requires you to
+    /// check for nulls first using [`Self::is_valid`].
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    ///
+    /// If this is a shredded variant but has no value at the shredded 
location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
     ///
     /// Note: Does not do deep validation of the [`Variant`], so it is up to 
the
     /// caller to ensure that the metadata and value were constructed 
correctly.
     pub fn value(&self, index: usize) -> Variant {
-        let metadata = self.metadata_field().as_binary_view().value(index);
-        let value = self.value_field().as_binary_view().value(index);
-        Variant::new(metadata, value)
+        match &self.shredding_state {
+            ShreddingState::Unshredded { metadata, value } => {
+                Variant::new(metadata.value(index), value.value(index))
+            }
+            ShreddingState::FullyShredded {
+                metadata: _,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::Null
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+            ShreddingState::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            } => {
+                if typed_value.is_null(index) {
+                    Variant::new(metadata.value(index), value.value(index))
+                } else {
+                    typed_value_to_variant(typed_value, index)
+                }
+            }
+        }
     }
 
-    fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("metadata").cloned()
+    /// Return a reference to the metadata field of the [`StructArray`]
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        self.shredding_state.metadata_field()
     }
 
-    fn find_value_field(array: &StructArray) -> Option<ArrayRef> {
-        array.column_by_name("value").cloned()
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
     }
 
-    /// Return a reference to the metadata field of the [`StructArray`]
-    pub fn metadata_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.metadata_ref
+    /// Return a reference to the typed_value field of the `StructArray`, if 
present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
     }
+}
 
-    /// Return a reference to the value field of the `StructArray`
-    pub fn value_field(&self) -> &ArrayRef {
-        // spec says fields order is not guaranteed, so we search by name
-        &self.value_ref
+/// Variant arrays can be shredded in one of three states, encoded here
+#[derive(Debug)]
+pub enum ShreddingState {
+    /// This variant has no typed_value field
+    Unshredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+    },
+    /// This variant has a typed_value field and no value field
+    /// meaning it is fully shredded (aka the value is stored in typed_value)
+    FullyShredded {
+        metadata: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+    /// This variant has both a value field and a typed_value field
+    /// meaning it is partially shredded: first the typed_value is used, and
+    /// if that is null, the value field is used.
+    PartiallyShredded {
+        metadata: BinaryViewArray,
+        value: BinaryViewArray,
+        typed_value: ArrayRef,
+    },
+}
+
+impl ShreddingState {
+    /// try to create a new `ShreddingState` from the given fields
+    pub fn try_new(
+        metadata: BinaryViewArray,
+        value: Option<BinaryViewArray>,
+        typed_value: Option<ArrayRef>,
+    ) -> Result<Self, ArrowError> {
+        match (metadata, value, typed_value) {
+            (metadata, Some(value), Some(typed_value)) => 
Ok(Self::PartiallyShredded {
+                metadata,
+                value,
+                typed_value,
+            }),
+            (metadata, Some(value), None) => Ok(Self::Unshredded { metadata, 
value }),
+            (metadata, None, Some(typed_value)) => Ok(Self::FullyShredded {
+                metadata,
+                typed_value,
+            }),
+            (_metadata_field, None, None) => 
Err(ArrowError::InvalidArgumentError(String::from(
+                "VariantArray has neither value nor typed_value field",

Review Comment:
   I'm pretty sure the shredding spec allows this case. It corresponds to a 
perfectly shredded all-null column where the writer chose to omit both of them 
as unnecessary. 
   
   (see other comment above)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to