alamb commented on code in PR #8514:
URL: https://github.com/apache/arrow-rs/pull/8514#discussion_r2392724491


##########
parquet-variant-compute/src/unshred_variant.rs:
##########
@@ -518,3 +546,59 @@ impl<'a> StructUnshredVariantBuilder<'a> {
         Ok(())
     }
 }
+
+/// Builder for unshredding list/array types with recursive element processing
+struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: &'a L,
+    element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
+}
+
+impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
+    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> 
Result<Self> {
+        // Create a recursive unshredder for the list elements
+        // The element type comes from the values array of the list
+        let element_values = typed_value.values();
+
+        // For shredded lists, each element would be a 
ShreddedVariantFieldArray (struct)
+        // Extract value/typed_value from the element struct
+        let Some(element_values) = element_values.as_struct_opt() else {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Invalid shredded variant array element: expected Struct, got 
{}",
+                element_values.data_type()
+            )));
+        };
+
+        // Create recursive unshredder for elements
+        //
+        // NOTE: A None/None array element is technically invalid, but the 
shredding spec
+        // requires us to emit `Variant::Null` when a required value is 
missing.
+        let element_unshredder = 
make_unshred_variant_row_builder(element_values.try_into()?)?
+            .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
+
+        Ok(Self {
+            value,
+            typed_value,
+            element_unshredder: Box::new(element_unshredder),
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        handle_unshredded_case!(self, builder, metadata, index, false);
+
+        // If we get here, typed_value is valid and value is NULL -- process 
the list elements
+        let mut list_builder = builder.try_new_list()?;
+        for element_index in self.typed_value.element_range(index) {
+            self.element_unshredder
+                .append_row(&mut list_builder, metadata, element_index)?;
+        }
+
+        list_builder.finish();
+        Ok(())
+    }
+}

Review Comment:
   I wonder how we could test this reasonably 🤔 Maybe we can rework the tests 
in `cast_to_variant` to `shred` and then `unshred` an array and verify it 
survives round tripping 🤔 



##########
parquet/tests/variant_integration.rs:
##########
@@ -65,8 +57,8 @@ macro_rules! variant_test_case {
 // - cases 40, 42, 87, 127 and 128 are expected to fail always (they include 
invalid variants)
 // - the remaining cases are expected to (eventually) pass
 
-variant_test_case!(1, "Unshredding not yet supported for type: List(");
-variant_test_case!(2, "Unshredding not yet supported for type: List(");
+variant_test_case!(1);
+variant_test_case!(2);

Review Comment:
   Do I read this diff correctly that after this PR we handle all the test 
cases other than Decimal? If so, that is pretty rad 🤯 
   
   (btw I think the reference to 
`https://github.com/apache/arrow-rs/issues/8329` above `variant_test_case!(4);` 
 is old and can be removed )



##########
parquet/tests/variant_integration.rs:
##########
@@ -34,24 +34,16 @@ use std::{fs, path::PathBuf};
 
 type Result<T> = std::result::Result<T, String>;
 
-/// Creates a test function for a given case number
+/// Creates a test function for a given case number.
+///
+/// If an error message is provided, generate an error test case that expects 
it.
 ///
 /// Note the index is zero-based, while the case number is one-based
 macro_rules! variant_test_case {
-    ($case_num:literal) => {
-        paste::paste! {
-            #[test]
-            fn [<test_variant_integration_case_ $case_num>]() {
-                all_cases()[$case_num - 1].run()
-            }
-        }
-    };
-
-    // Generates an error test case, where the expected result is an error 
message
-    ($case_num:literal, $expected_error:literal) => {

Review Comment:
   TIL -- thanks @scovich 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to