klion26 commented on code in PR #9598:
URL: https://github.com/apache/arrow-rs/pull/9598#discussion_r3063162132


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -223,26 +226,65 @@ fn shredded_get_path(
     // For shredded/partially-shredded targets (`typed_value` present), 
recurse into each field
     // separately to take advantage of deeper shredding in child fields.
     if let DataType::Struct(fields) = as_field.data_type() {
-        if target.typed_value_field().is_none() {
+        let has_variant_fields = fields
+            .iter()
+            .any(|f| f.try_extension_type::<VariantType>().is_ok());
+        if target.typed_value_field().is_none() && !has_variant_fields {
             return shred_basic_variant(target, VariantPath::default(), 
Some(as_field));
         }
 
-        let children = fields
-            .iter()
-            .map(|field| {
-                shredded_get_path(
-                    &target,
-                    &[VariantPathElement::from(field.name().as_str())],
-                    Some(field),
-                    cast_options,
-                )
-            })
-            .collect::<Result<Vec<_>>>()?;
+        let mut updated_fields = Vec::with_capacity(fields.len());
+        let mut children = Vec::with_capacity(fields.len());
+        for field in fields.iter() {
+            // If the field has VariantType extension metadata, extract it as a
+            // VariantArray instead of casting to the declared data type. This 
allows
+            // callers to request structs where some fields remain as variants.
+            // See test_struct_extraction_with_variant_fields for usage 
example.
+            let is_variant_field = 
field.try_extension_type::<VariantType>().is_ok();
+            let field_as_type = (!is_variant_field).then(|| field.as_ref());
+            let child = shredded_get_path(
+                &target,
+                &[VariantPathElement::from(field.name().as_str())],
+                field_as_type,
+                cast_options,
+            )?;
+
+            if is_variant_field {
+                // Variant field: the child's actual data type (VariantArray's 
internal
+                // StructArray) differs from the user's declared type (empty 
Struct),
+                // so we must update the field's data type and re-attach 
VariantType.
+                //
+                // When the field is entirely absent, shredded_get_path 
returns a
+                // NullArray. Construct an all-null VariantArray so the 
extension
+                // metadata is preserved.
+                let child = if child.data_type() == &DataType::Null {

Review Comment:
   Maybe we can add a test which covers this code block?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -223,26 +226,65 @@ fn shredded_get_path(
     // For shredded/partially-shredded targets (`typed_value` present), 
recurse into each field
     // separately to take advantage of deeper shredding in child fields.
     if let DataType::Struct(fields) = as_field.data_type() {
-        if target.typed_value_field().is_none() {
+        let has_variant_fields = fields
+            .iter()
+            .any(|f| f.try_extension_type::<VariantType>().is_ok());
+        if target.typed_value_field().is_none() && !has_variant_fields {
             return shred_basic_variant(target, VariantPath::default(), 
Some(as_field));
         }
 
-        let children = fields
-            .iter()
-            .map(|field| {
-                shredded_get_path(
-                    &target,
-                    &[VariantPathElement::from(field.name().as_str())],
-                    Some(field),
-                    cast_options,
-                )
-            })
-            .collect::<Result<Vec<_>>>()?;
+        let mut updated_fields = Vec::with_capacity(fields.len());
+        let mut children = Vec::with_capacity(fields.len());
+        for field in fields.iter() {
+            // If the field has VariantType extension metadata, extract it as a
+            // VariantArray instead of casting to the declared data type. This 
allows
+            // callers to request structs where some fields remain as variants.
+            // See test_struct_extraction_with_variant_fields for usage 
example.
+            let is_variant_field = 
field.try_extension_type::<VariantType>().is_ok();
+            let field_as_type = (!is_variant_field).then(|| field.as_ref());
+            let child = shredded_get_path(
+                &target,
+                &[VariantPathElement::from(field.name().as_str())],
+                field_as_type,
+                cast_options,
+            )?;
+
+            if is_variant_field {
+                // Variant field: the child's actual data type (VariantArray's 
internal
+                // StructArray) differs from the user's declared type (empty 
Struct),
+                // so we must update the field's data type and re-attach 
VariantType.
+                //
+                // When the field is entirely absent, shredded_get_path 
returns a
+                // NullArray. Construct an all-null VariantArray so the 
extension
+                // metadata is preserved.
+                let child = if child.data_type() == &DataType::Null {

Review Comment:
   `cargo llvm-cov --html test -p parquet-variant-compute` shows that this is 
not covered
   
   <img width="843" height="160" alt="Image" 
src="https://github.com/user-attachments/assets/e5e5bf41-175a-4851-ab1f-50a15c740b96";
 />



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to