sdf-jkl commented on code in PR #8354:
URL: https://github.com/apache/arrow-rs/pull/8354#discussion_r2912576417
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -1848,6 +1979,282 @@ mod test {
assert_eq!(&result, &expected);
}
+ /// This test uses a pre-shredded list array and validates index-path
access.
+ #[test]
+ fn test_shredded_list_index_access() {
+ let array = shredded_list_variant_array();
+ // Test: Extract the 0 index field as VariantArray first
+ let options = GetOptions::new_with_path(VariantPath::from(0));
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+ assert_eq!(result_variant.len(), 2);
+
+ // Row 0: expect 0 index = "comedy"
+ assert_eq!(result_variant.value(0), Variant::from("comedy"));
+ // Row 1: expect 0 index = "horror"
+ assert_eq!(result_variant.value(1), Variant::from("horror"));
+ }
+
+ /// Test extracting shredded list field with type conversion.
+ #[test]
+ fn test_shredded_list_as_string() {
+ let array = shredded_list_variant_array();
+ // Test: Extract the 0 index values as StringArray (type conversion)
+ let field = Field::new("typed_value", DataType::Utf8, false);
+ let options = GetOptions::new_with_path(VariantPath::from(0))
+ .with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&array, options).unwrap();
+ // Should get StringArray
+ let expected: ArrayRef =
Arc::new(StringArray::from(vec![Some("comedy"), Some("horror")]));
+ assert_eq!(&result, &expected);
+ }
+
+ #[test]
+ fn test_shredded_list_index_access_from_value_field() {
+ let array = shredded_list_variant_array();
+ // Index 1 maps to "drama" for row 0, and to fallback value 123 for
row 1.
+ let options = GetOptions::new_with_path(VariantPath::from(1));
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+
+ assert_eq!(result_variant.value(0), Variant::from("drama"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+ }
+
+ #[test]
+ fn test_shredded_list_index_access_from_value_field_as_int64() {
+ let array = shredded_list_variant_array();
+ let field = Field::new("typed_value", DataType::Int64, true);
+ let options = GetOptions::new_with_path(VariantPath::from(1))
+ .with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&array, options).unwrap();
+
+ // "drama" -> NULL, 123 -> 123.
+ let expected: ArrayRef = Arc::new(Int64Array::from(vec![None,
Some(123)]));
+ assert_eq!(&result, &expected);
+ }
+
+ #[test]
+ fn test_shredded_list_index_out_of_bounds_unsafe_cast_errors() {
+ let options =
+
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+ safe: false,
+ ..Default::default()
+ });
+
+ let err = variant_get(&shredded_list_variant_array(),
options.clone()).unwrap_err();
+ assert!(err.to_string().contains("Cannot access index '10'"));
+ }
+
+ #[test]
+ fn test_shredded_large_list_index_access_from_value_field() {
+ let array = shredded_large_list_variant_array();
+ // Index 1 maps to "drama" for row 0, and to fallback value 123 for
row 1.
+ let options = GetOptions::new_with_path(VariantPath::from(1));
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+
+ assert_eq!(result_variant.value(0), Variant::from("drama"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+ }
+
+ #[test]
+ fn test_shredded_large_list_index_out_of_bounds_unsafe_cast_errors() {
+ let options =
+
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+ safe: false,
+ ..Default::default()
+ });
+
+ let err = variant_get(&shredded_large_list_variant_array(),
options).unwrap_err();
+ assert!(err.to_string().contains("Cannot access index '10'"));
+ }
+
+ #[test]
+ fn test_shredded_list_view_index_access_from_value_field() {
+ let array = shredded_list_view_variant_array();
+ let options = GetOptions::new_with_path(VariantPath::from(1));
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+
+ assert_eq!(result_variant.value(0), Variant::from("drama"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+ }
+
+ #[test]
+ fn test_shredded_list_view_index_out_of_bounds_unsafe_cast_errors() {
+ let options =
+
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+ safe: false,
+ ..Default::default()
+ });
+
+ let err = variant_get(&shredded_list_view_variant_array(),
options).unwrap_err();
+ assert!(err.to_string().contains("Cannot access index '10'"));
+ }
+
+ #[test]
+ fn test_shredded_large_list_view_index_access_from_value_field() {
+ let array = shredded_large_list_view_variant_array();
+ let options = GetOptions::new_with_path(VariantPath::from(1));
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+
+ assert_eq!(result_variant.value(0), Variant::from("drama"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+ }
+
+ #[test]
+ fn test_shredded_large_list_view_index_out_of_bounds_unsafe_cast_errors() {
+ let options =
+
GetOptions::new_with_path(VariantPath::from(10)).with_cast_options(CastOptions {
+ safe: false,
+ ..Default::default()
+ });
+
+ let err = variant_get(&shredded_large_list_view_variant_array(),
options).unwrap_err();
+ assert!(err.to_string().contains("Cannot access index '10'"));
+ }
+
+ #[test]
+ fn test_shredded_list_in_struct_index_access() {
+ let array = shredded_struct_with_list_variant_array();
+ let options =
GetOptions::new_with_path(VariantPath::try_from("a[1]").unwrap());
+ let result = variant_get(&array, options).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+
+ assert_eq!(result_variant.value(0), Variant::from("drama"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+ }
+
+ #[test]
+ fn test_shredded_struct_in_list_field_access() {
+ let array = shredded_list_of_struct_variant_array();
+ let field = Field::new("x", DataType::Int32, true);
+ let path = VariantPath::from(0).join("x");
+ let options =
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&array, options).unwrap();
+
+ let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1),
Some(3)]));
+ assert_eq!(&result, &expected);
+ }
+
+ #[test]
+ fn test_shredded_list_of_lists_index_access() {
+ let array = shredded_list_of_lists_variant_array();
+ let path = VariantPath::from(0).join(1);
+
+ let result = variant_get(&array,
GetOptions::new_with_path(path.clone())).unwrap();
+ let result_variant = VariantArray::try_new(&result).unwrap();
+ assert_eq!(result_variant.value(0), Variant::from("b"));
+ assert_eq!(result_variant.value(1).as_int64(), Some(123));
+
+ let field = Field::new("typed_value", DataType::Int64, true);
+ let casted = variant_get(
+ &array,
+
GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field))),
+ )
+ .unwrap();
+ let expected: ArrayRef = Arc::new(Int64Array::from(vec![None,
Some(123)]));
+ assert_eq!(&casted, &expected);
+ }
+
+ /// Helper to create a shredded list variant array used by list index
tests.
+ ///
+ /// Rows:
+ /// 1. `["comedy", "drama"]` (fully shred-able as `Utf8`)
+ /// 2. `["horror", 123]` (partially shredded, with fallback for the
numeric element)
+ fn shredded_list_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"["comedy", "drama"]"#),
+ Some(r#"["horror", 123]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let list_schema = DataType::List(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let shredded = shred_variant(&input, &list_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_struct_with_list_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"{"a": ["comedy", "drama"]}"#),
+ Some(r#"{"a": ["horror", 123]}"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let list_schema = DataType::List(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let shredding_schema = ShreddedSchemaBuilder::default()
+ .with_path("a", &list_schema)
+ .unwrap()
+ .build();
+ let shredded = shred_variant(&input, &shredding_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_list_of_struct_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"[{"x": 1}, {"x": 2}]"#),
+ Some(r#"[{"x": 3}, {"y": 4}]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let struct_type =
+ DataType::Struct(Fields::from(vec![Field::new("x",
DataType::Int32, true)]));
+ let list_schema = DataType::List(Arc::new(Field::new("item",
struct_type, true)));
+ let shredded = shred_variant(&input, &list_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_list_of_lists_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"[["a", "b"], ["c", "d"]]"#),
+ Some(r#"[["x", 123], ["y", "z"]]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let inner_list = DataType::List(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let outer_list = DataType::List(Arc::new(Field::new("item",
inner_list, true)));
+ let shredded = shred_variant(&input, &outer_list).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_large_list_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"["comedy", "drama"]"#),
+ Some(r#"["horror", 123]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let list_schema = DataType::LargeList(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let shredded = shred_variant(&input, &list_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_list_view_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"["comedy", "drama"]"#),
+ Some(r#"["horror", 123]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let list_schema = DataType::ListView(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let shredded = shred_variant(&input, &list_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
+
+ fn shredded_large_list_view_variant_array() -> ArrayRef {
+ let json_rows: ArrayRef = Arc::new(StringArray::from(vec![
+ Some(r#"["comedy", "drama"]"#),
+ Some(r#"["horror", 123]"#),
+ ]));
+ let input = json_to_variant(&json_rows).unwrap();
+
+ let list_schema =
+ DataType::LargeListView(Arc::new(Field::new("item",
DataType::Utf8, true)));
+ let shredded = shred_variant(&input, &list_schema).unwrap();
+ ArrayRef::from(shredded)
+ }
Review Comment:
Thanks, added the changes here
https://github.com/apache/arrow-rs/pull/8354/commits/18d04d718103fd1334b54bc01cad24c8dcb9d132
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]