This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 07ae1dd7ce [Variant] Introduce new BorrowedShreddingState concept 
(#8499)
07ae1dd7ce is described below

commit 07ae1dd7ceee83a5e17e209626cc92230cd04609
Author: Ryan Johnson <[email protected]>
AuthorDate: Tue Sep 30 03:36:22 2025 -0600

    [Variant] Introduce new BorrowedShreddingState concept (#8499)
    
    # Which issue does this PR close?
    
    - Relates to https://github.com/apache/arrow-rs/issues/8336
    
    # Rationale for this change
    
    While pathfinding support for unshredding variant objects and arrays, I
    ran into the same issue that `variant_get` already suffers --
    `ShreddingState` is inconvenient because it owns the `value` and
    `typed_value` columns it refers to, even tho borrowing them usually
    suffices.
    
    # What changes are included in this PR?
    
    Define a new `BorrowedShreddingState` which does what it says, and
    update `ShreddedPathStep` (in variant_get.rs) to use it.
    
    Also, make the constructor fallible in order to correctly report casting
    failures if the `value` column is not a binary view.
    
    # Are these changes tested?
    
    Yes, existing tests cover this code.
    
    # Are there any user-facing changes?
    
    New `TryFrom` and `From` implementations.
    
    `impl From<&StructArray> for ShreddingState` was replaced by a suitable
    `TryFrom`.
---
 parquet-variant-compute/src/lib.rs           |   2 +-
 parquet-variant-compute/src/variant_array.rs | 110 ++++++++++++++++++++-------
 parquet-variant-compute/src/variant_get.rs   |  25 +++---
 3 files changed, 97 insertions(+), 40 deletions(-)

diff --git a/parquet-variant-compute/src/lib.rs 
b/parquet-variant-compute/src/lib.rs
index 496d550d95..b3876ef6ab 100644
--- a/parquet-variant-compute/src/lib.rs
+++ b/parquet-variant-compute/src/lib.rs
@@ -46,7 +46,7 @@ mod variant_array_builder;
 pub mod variant_get;
 mod variant_to_arrow;
 
-pub use variant_array::{ShreddingState, VariantArray, VariantType};
+pub use variant_array::{BorrowedShreddingState, ShreddingState, VariantArray, 
VariantType};
 pub use variant_array_builder::{VariantArrayBuilder, VariantValueArrayBuilder};
 
 pub use cast_to_variant::{cast_to_variant, cast_to_variant_with_options};
diff --git a/parquet-variant-compute/src/variant_array.rs 
b/parquet-variant-compute/src/variant_array.rs
index d366d702aa..79ef37f0c4 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -272,26 +272,11 @@ impl VariantArray {
             )));
         };
 
-        // Extract value and typed_value fields
-        let value = if let Some(value_col) = inner.column_by_name("value") {
-            if let Some(binary_view) = value_col.as_binary_view_opt() {
-                Some(binary_view.clone())
-            } else {
-                return Err(ArrowError::NotYetImplemented(format!(
-                    "VariantArray 'value' field must be BinaryView, got {}",
-                    value_col.data_type()
-                )));
-            }
-        } else {
-            None
-        };
-        let typed_value = inner.column_by_name("typed_value").cloned();
-
         // Note these clones are cheap, they just bump the ref count
         Ok(Self {
             inner: inner.clone(),
             metadata: metadata.clone(),
-            shredding_state: ShreddingState::new(value, typed_value),
+            shredding_state: ShreddingState::try_from(inner)?,
         })
     }
 
@@ -521,7 +506,7 @@ impl ShreddedVariantFieldArray {
         // Note this clone is cheap, it just bumps the ref count
         Ok(Self {
             inner: inner_struct.clone(),
-            shredding_state: ShreddingState::from(inner_struct),
+            shredding_state: ShreddingState::try_from(inner_struct)?,
         })
     }
 
@@ -660,7 +645,7 @@ impl ShreddingState {
     /// Create a new `ShreddingState` from the given `value` and `typed_value` 
fields
     ///
     /// Note you can create a `ShreddingState` from a &[`StructArray`] using
-    /// `ShreddingState::from(&struct_array)`, for example:
+    /// `ShreddingState::try_from(&struct_array)`, for example:
     ///
     /// ```no_run
     /// # use arrow::array::StructArray;
@@ -669,7 +654,7 @@ impl ShreddingState {
     /// #   unimplemented!()
     /// # }
     /// let struct_array: StructArray = get_struct_array();
-    /// let shredding_state = ShreddingState::from(&struct_array);
+    /// let shredding_state = ShreddingState::try_from(&struct_array).unwrap();
     /// ```
     pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) 
-> Self {
         Self { value, typed_value }
@@ -685,6 +670,14 @@ impl ShreddingState {
         self.typed_value.as_ref()
     }
 
+    /// Returns a borrowed version of this shredding state
+    pub fn borrow(&self) -> BorrowedShreddingState<'_> {
+        BorrowedShreddingState {
+            value: self.value_field(),
+            typed_value: self.typed_value_field(),
+        }
+    }
+
     /// Slice all the underlying arrays
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         Self {
@@ -694,14 +687,79 @@ impl ShreddingState {
     }
 }
 
-impl From<&StructArray> for ShreddingState {
-    fn from(inner_struct: &StructArray) -> Self {
-        let value = inner_struct
-            .column_by_name("value")
-            .and_then(|col| col.as_binary_view_opt().cloned());
-        let typed_value = inner_struct.column_by_name("typed_value").cloned();
+/// Similar to [`ShreddingState`] except it holds borrowed references of the 
target arrays. Useful
+/// for avoiding clone operations when the caller does not need a 
self-standing shredding state.
+#[derive(Clone, Debug)]
+pub struct BorrowedShreddingState<'a> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: Option<&'a ArrayRef>,
+}
 
-        ShreddingState::new(value, typed_value)
+impl<'a> BorrowedShreddingState<'a> {
+    /// Create a new `BorrowedShreddingState` from the given `value` and 
`typed_value` fields
+    ///
+    /// Note you can create a `BorrowedShreddingState` from a &[`StructArray`] 
using
+    /// `BorrowedShreddingState::try_from(&struct_array)`, for example:
+    ///
+    /// ```no_run
+    /// # use arrow::array::StructArray;
+    /// # use parquet_variant_compute::BorrowedShreddingState;
+    /// # fn get_struct_array() -> StructArray {
+    /// #   unimplemented!()
+    /// # }
+    /// let struct_array: StructArray = get_struct_array();
+    /// let shredding_state = 
BorrowedShreddingState::try_from(&struct_array).unwrap();
+    /// ```
+    pub fn new(value: Option<&'a BinaryViewArray>, typed_value: Option<&'a 
ArrayRef>) -> Self {
+        Self { value, typed_value }
+    }
+
+    /// Return a reference to the value field, if present
+    pub fn value_field(&self) -> Option<&'a BinaryViewArray> {
+        self.value
+    }
+
+    /// Return a reference to the typed_value field, if present
+    pub fn typed_value_field(&self) -> Option<&'a ArrayRef> {
+        self.typed_value
+    }
+}
+
+impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
+    type Error = ArrowError;
+
+    fn try_from(inner_struct: &'a StructArray) -> Result<Self, ArrowError> {
+        // The `value` column need not exist, but if it does it must be a 
binary view.
+        let value = if let Some(value_col) = 
inner_struct.column_by_name("value") {
+            let Some(binary_view) = value_col.as_binary_view_opt() else {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "VariantArray 'value' field must be BinaryView, got {}",
+                    value_col.data_type()
+                )));
+            };
+            Some(binary_view)
+        } else {
+            None
+        };
+        let typed_value = inner_struct.column_by_name("typed_value");
+        Ok(BorrowedShreddingState::new(value, typed_value))
+    }
+}
+
+impl TryFrom<&StructArray> for ShreddingState {
+    type Error = ArrowError;
+
+    fn try_from(inner_struct: &StructArray) -> Result<Self, ArrowError> {
+        Ok(BorrowedShreddingState::try_from(inner_struct)?.into())
+    }
+}
+
+impl From<BorrowedShreddingState<'_>> for ShreddingState {
+    fn from(state: BorrowedShreddingState<'_>) -> Self {
+        ShreddingState {
+            value: state.value_field().cloned(),
+            typed_value: state.typed_value_field().cloned(),
+        }
     }
 }
 
diff --git a/parquet-variant-compute/src/variant_get.rs 
b/parquet-variant-compute/src/variant_get.rs
index a923732ca4..977507af42 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -23,16 +23,16 @@ use arrow::{
 use arrow_schema::{ArrowError, DataType, FieldRef};
 use parquet_variant::{VariantPath, VariantPathElement};
 
-use crate::variant_array::ShreddingState;
+use crate::variant_array::BorrowedShreddingState;
 use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
 use crate::VariantArray;
 
 use arrow::array::AsArray;
 use std::sync::Arc;
 
-pub(crate) enum ShreddedPathStep {
+pub(crate) enum ShreddedPathStep<'a> {
     /// Path step succeeded, return the new shredding state
-    Success(ShreddingState),
+    Success(BorrowedShreddingState<'a>),
     /// The path element is not present in the `typed_value` column and there 
is no `value` column,
     /// so we know it does not exist. It, and all paths under it, are all-NULL.
     Missing,
@@ -47,18 +47,16 @@ pub(crate) enum ShreddedPathStep {
 /// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
 ///
 /// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
-pub(crate) fn follow_shredded_path_element(
-    shredding_state: &ShreddingState,
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &BorrowedShreddingState<'a>,
     path_element: &VariantPathElement<'_>,
     cast_options: &CastOptions,
-) -> Result<ShreddedPathStep> {
+) -> Result<ShreddedPathStep<'a>> {
     // If the requested path element is not present in `typed_value`, and 
`value` is missing, then
     // we know it does not exist; it, and all paths under it, are all-NULL.
-    let missing_path_step = || {
-        let Some(_value_field) = shredding_state.value_field() else {
-            return ShreddedPathStep::Missing;
-        };
-        ShreddedPathStep::NotShredded
+    let missing_path_step = || match shredding_state.value_field() {
+        Some(_) => ShreddedPathStep::NotShredded,
+        None => ShreddedPathStep::Missing,
     };
 
     let Some(typed_value) = shredding_state.typed_value_field() else {
@@ -98,7 +96,8 @@ pub(crate) fn follow_shredded_path_element(
                 ))
             })?;
 
-            Ok(ShreddedPathStep::Success(struct_array.into()))
+            let state = BorrowedShreddingState::try_from(struct_array)?;
+            Ok(ShreddedPathStep::Success(state))
         }
         VariantPathElement::Index { .. } => {
             // TODO: Support array indexing. Among other things, it will 
require slicing not
@@ -152,7 +151,7 @@ fn shredded_get_path(
 
     // Peel away the prefix of path elements that traverses the shredded parts 
of this variant
     // column. Shredding will traverse the rest of the path on a per-row basis.
-    let mut shredding_state = input.shredding_state().clone();
+    let mut shredding_state = input.shredding_state().borrow();
     let mut accumulated_nulls = input.inner().nulls().cloned();
     let mut path_index = 0;
     for path_element in path {

Reply via email to