alamb commented on code in PR #8514:
URL: https://github.com/apache/arrow-rs/pull/8514#discussion_r2392724491
##########
parquet-variant-compute/src/unshred_variant.rs:
##########
@@ -518,3 +546,59 @@ impl<'a> StructUnshredVariantBuilder<'a> {
Ok(())
}
}
+
+/// Builder for unshredding list/array types with recursive element processing
+struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
+ value: Option<&'a BinaryViewArray>,
+ typed_value: &'a L,
+ element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
+}
+
+impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
+ fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) ->
Result<Self> {
+ // Create a recursive unshredder for the list elements
+ // The element type comes from the values array of the list
+ let element_values = typed_value.values();
+
+ // For shredded lists, each element would be a
ShreddedVariantFieldArray (struct)
+ // Extract value/typed_value from the element struct
+ let Some(element_values) = element_values.as_struct_opt() else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid shredded variant array element: expected Struct, got
{}",
+ element_values.data_type()
+ )));
+ };
+
+ // Create recursive unshredder for elements
+ //
+ // NOTE: A None/None array element is technically invalid, but the
shredding spec
+ // requires us to emit `Variant::Null` when a required value is
missing.
+ let element_unshredder =
make_unshred_variant_row_builder(element_values.try_into()?)?
+ .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
+
+ Ok(Self {
+ value,
+ typed_value,
+ element_unshredder: Box::new(element_unshredder),
+ })
+ }
+
+ fn append_row(
+ &mut self,
+ builder: &mut impl VariantBuilderExt,
+ metadata: &VariantMetadata,
+ index: usize,
+ ) -> Result<()> {
+ handle_unshredded_case!(self, builder, metadata, index, false);
+
+ // If we get here, typed_value is valid and value is NULL -- process
the list elements
+ let mut list_builder = builder.try_new_list()?;
+ for element_index in self.typed_value.element_range(index) {
+ self.element_unshredder
+ .append_row(&mut list_builder, metadata, element_index)?;
+ }
+
+ list_builder.finish();
+ Ok(())
+ }
+}
Review Comment:
I wonder how we could test this reasonably 🤔 Maybe we can rework the tests
in `cast_to_variant` to `shred` and then `unshred` an array and verify it
survives round tripping 🤔
##########
parquet/tests/variant_integration.rs:
##########
@@ -65,8 +57,8 @@ macro_rules! variant_test_case {
// - cases 40, 42, 87, 127 and 128 are expected to fail always (they include
invalid variants)
// - the remaining cases are expected to (eventually) pass
-variant_test_case!(1, "Unshredding not yet supported for type: List(");
-variant_test_case!(2, "Unshredding not yet supported for type: List(");
+variant_test_case!(1);
+variant_test_case!(2);
Review Comment:
Do I read this diff correctly that after this PR we handle all the test
cases other than Decimal? If so, that is pretty rad 🤯
(btw I think the reference to
`https://github.com/apache/arrow-rs/issues/8329` above `variant_test_case!(4);`
is old and can be removed )
##########
parquet/tests/variant_integration.rs:
##########
@@ -34,24 +34,16 @@ use std::{fs, path::PathBuf};
type Result<T> = std::result::Result<T, String>;
-/// Creates a test function for a given case number
+/// Creates a test function for a given case number.
+///
+/// If an error message is provided, generate an error test case that expects
it.
///
/// Note the index is zero-based, while the case number is one-based
macro_rules! variant_test_case {
- ($case_num:literal) => {
- paste::paste! {
- #[test]
- fn [<test_variant_integration_case_ $case_num>]() {
- all_cases()[$case_num - 1].run()
- }
- }
- };
-
- // Generates an error test case, where the expected result is an error
message
- ($case_num:literal, $expected_error:literal) => {
Review Comment:
TIL -- thanks @scovich
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]