scovich commented on code in PR #8354: URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2357565134
########## parquet-variant-compute/src/variant_get.rs: ########## @@ -1122,64 +1121,109 @@ mod test { let result = variant_get(&array, options).unwrap(); // Should get StringArray - let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")])); + let expected: ArrayRef = + Arc::new(StringArray::from(vec![Some("comedy"), None, Some("drama")])); assert_eq!(&result, &expected); } /// Helper function to create a shredded variant array representing lists /// /// This creates an array that represents: /// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded) - /// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - partially shredded) - /// Row 2: ["comedy", "drama", "romance"] (perfectly shredded) + /// Row 1: ["horror", 123] ([0] is shredded, [1] is int - partially shredded) /// /// The physical layout follows the shredding spec where: /// - metadata: contains list metadata /// - typed_value: StructArray with 0 index value /// - value: contains fallback for fn shredded_list_variant_array() -> ArrayRef { - // Create the base metadata for lists - - // Could add this as an api for VariantList, like VariantList::from() - fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) { - let mut builder = parquet_variant::VariantBuilder::new(); - let mut list = builder.new_list(); - for value in vector { - list.append_value(value); - } - list.finish(); - builder.finish() - } - let (metadata1, _) = - build_list_metadata(vec![Variant::String("comedy"), Variant::String("drama")]); + // Create metadata array + let metadata_array = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 2)); - let (metadata2, _) = build_list_metadata(vec![Variant::String("horror"), Variant::Null]); + // Building the typed_value ListArray - let (metadata3, _) = build_list_metadata(vec![ - Variant::String("comedy"), - Variant::String("drama"), - Variant::String("romance"), + // Need a StructBuilder to create a ListBuilder + let fields = Fields::from(vec![ + Field::new("value", DataType::Binary, true), + Field::new("typed_value", DataType::Utf8, true), ]); + let field_builders = vec![ + make_builder(&DataType::Binary, 4), + make_builder(&DataType::Utf8, 4), + ]; + let struct_builder = StructBuilder::new(fields, field_builders); + + let mut builder = GenericListBuilder::<i32, StructBuilder>::new(struct_builder); + + // Row 0 index 0 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("comedy"); + builder.values().append(true); + + // Row 0 index 1 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("drama"); + builder.values().append(true); + + // Next row + builder.append(true); + + // Row 1 index 0 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("horror"); + builder.values().append(true); + + // Row 1 index 1 + let mut variant_builder = VariantBuilder::new(); + variant_builder.append_value(123i32); // <------ couldn't find the right way to do it, used this as placeholder for binary + let (_, value) = variant_builder.finish(); + + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_value(value); Review Comment: NOTE: The metadata column still needs a metadata entry for that row, else the different columns will have disagreeing row counts. Can append `EMPTY_VARIANT_METADATA_BYTES` if the value is primitive. ########## parquet-variant-compute/src/variant_get.rs: ########## @@ -1122,64 +1121,109 @@ mod test { let result = variant_get(&array, options).unwrap(); // Should get StringArray - let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")])); + let expected: ArrayRef = + Arc::new(StringArray::from(vec![Some("comedy"), None, Some("drama")])); assert_eq!(&result, &expected); } /// Helper function to create a shredded variant array representing lists /// /// This creates an array that represents: /// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded) - /// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - partially shredded) - /// Row 2: ["comedy", "drama", "romance"] (perfectly shredded) + /// Row 1: ["horror", 123] ([0] is shredded, [1] is int - partially shredded) /// /// The physical layout follows the shredding spec where: /// - metadata: contains list metadata /// - typed_value: StructArray with 0 index value /// - value: contains fallback for fn shredded_list_variant_array() -> ArrayRef { - // Create the base metadata for lists - - // Could add this as an api for VariantList, like VariantList::from() - fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) { - let mut builder = parquet_variant::VariantBuilder::new(); - let mut list = builder.new_list(); - for value in vector { - list.append_value(value); - } - list.finish(); - builder.finish() - } - let (metadata1, _) = - build_list_metadata(vec![Variant::String("comedy"), Variant::String("drama")]); + // Create metadata array + let metadata_array = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 2)); - let (metadata2, _) = build_list_metadata(vec![Variant::String("horror"), Variant::Null]); + // Building the typed_value ListArray - let (metadata3, _) = build_list_metadata(vec![ - Variant::String("comedy"), - Variant::String("drama"), - Variant::String("romance"), + // Need a StructBuilder to create a ListBuilder + let fields = Fields::from(vec![ + Field::new("value", DataType::Binary, true), + Field::new("typed_value", DataType::Utf8, true), ]); + let field_builders = vec![ + make_builder(&DataType::Binary, 4), + make_builder(&DataType::Utf8, 4), + ]; + let struct_builder = StructBuilder::new(fields, field_builders); + + let mut builder = GenericListBuilder::<i32, StructBuilder>::new(struct_builder); + + // Row 0 index 0 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("comedy"); + builder.values().append(true); + + // Row 0 index 1 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("drama"); + builder.values().append(true); + + // Next row + builder.append(true); + + // Row 1 index 0 + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_null(); + builder + .values() + .field_builder::<StringBuilder>(1) + .unwrap() + .append_value("horror"); + builder.values().append(true); + + // Row 1 index 1 + let mut variant_builder = VariantBuilder::new(); + variant_builder.append_value(123i32); // <------ couldn't find the right way to do it, used this as placeholder for binary + let (_, value) = variant_builder.finish(); + + builder + .values() + .field_builder::<BinaryBuilder>(0) + .unwrap() + .append_value(value); Review Comment: NOTE: The metadata column still needs a metadata entry for that row, else the different columns will have disagreeing row counts. Can append `EMPTY_VARIANT_METADATA_BYTES` if the corresponding value is primitive. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org