scovich commented on code in PR #8122:
URL: https://github.com/apache/arrow-rs/pull/8122#discussion_r2274350236


##########
parquet-variant-compute/src/variant_get/mod.rs:
##########
@@ -15,50 +15,208 @@
 // specific language governing permissions and limitations
 // under the License.
 use arrow::{
-    array::{Array, ArrayRef},
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
     compute::CastOptions,
     error::Result,
 };
-use arrow_schema::{ArrowError, FieldRef};
-use parquet_variant::VariantPath;
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
 
 use crate::variant_array::ShreddingState;
-use crate::variant_get::output::instantiate_output_builder;
-use crate::VariantArray;
+use crate::{variant_array::ShreddedVariantFieldArray, VariantArray};
+
+use std::sync::Arc;
 
 mod output;
 
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(&'a ShreddingState),
+    /// The path element is not present in the `typed_value` column and there 
is no `value` column,
+    /// so we we know it does not exist. It, and all paths under it, are 
all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` and must be 
retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, 
including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try 
to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no 
`typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name 
does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe 
not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &'a ShreddingState,
+    path_element: &VariantPathElement<'_>,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element 's not present in `typed_value`, and 
`value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || {
+        if shredding_state.value_field().is_none() {
+            ShreddedPathStep::Missing
+        } else {
+            ShreddedPathStep::NotShredded
+        }
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            let Some(field) = typed_value
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .and_then(|typed_value| typed_value.column_by_name(name))
+            else {
+                return Ok(missing_path_step());
+            };
+
+            let field = field
+                .as_any()
+                .downcast_ref::<ShreddedVariantFieldArray>()

Review Comment:
   Should we continue this discussion in the other comment thread 
https://github.com/apache/arrow-rs/pull/8122#discussion_r2270806432, where I 
outlined the pros and cons of this struct?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to