ngli-me commented on PR #6758:
URL: https://github.com/apache/arrow-rs/pull/6758#issuecomment-2601381962
Sorry, fell ill there for a good while. Added some additional tests to
hopefully cover some more of the edges. I was trying to adapt it over for
`Schema` as well, but I had some trouble initializing the `ListArray` with the
inner `StructArray`, tried a few different things but was unable to get
two_field working (I think I'm misunderstanding something with the buffer, as I
get "InvalidArgumentError("ListArray data should contain a single buffer only
(value offsets), had 0")"). Otherwise, should be ready for review!
``` rust
// Initialize schema
let a = Arc::new(Field::new("a", DataType::Int64, true));
let b = Arc::new(Field::new("b", DataType::Int64, false));
let c = Arc::new(Field::new("c", DataType::Int64, true));
let one = Arc::new(Field::new(
"1",
DataType::Struct(Fields::from(vec![a.clone(), b.clone(),
c.clone()])),
false,
));
let two = Arc::new(Field::new(
"2",
DataType::List(Arc::new(Field::new_list_field(
DataType::Struct(Fields::from(vec![a.clone(), b.clone(),
c.clone()])),
true,
))),
false,
));
let exclamation = Arc::new(Field::new(
"!",
DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
false,
));
let schema = Schema::new(vec![exclamation.clone()]);
// Initialize fields
let a_field = Int64Array::from(vec![Some(0), Some(1)]);
let b_field = Int64Array::from(vec![Some(2), Some(3)]);
let c_field = Int64Array::from(vec![None, Some(4)]);
let one_field = StructArray::from(vec![
(a.clone(), Arc::new(a_field.clone()) as ArrayRef),
(b.clone(), Arc::new(b_field.clone()) as ArrayRef),
(c.clone(), Arc::new(c_field.clone()) as ArrayRef),
]);
let two_field_data =
ArrayData::builder(DataType::Struct(Fields::from(vec![a.clone(), b.clone(),
c.clone()])))
.len(2)
.add_child_data(Arc::new(a_field.clone()).to_data())
.add_child_data(Arc::new(b_field.clone()).to_data())
.add_child_data(Arc::new(c_field.clone()).to_data())
.build()
.unwrap();
let two_field = ListArray::from(two_field_data);
let exclamation_field = Arc::new(StructArray::from(vec![
(one.clone(), Arc::new(one_field) as ArrayRef),
(two.clone(), Arc::new(two_field) as ArrayRef),
]));
// Normalize all levels
let normalized = RecordBatch::try_new(Arc::new(schema),
vec![exclamation_field])
.expect("valid conversion")
.normalize(".", None)
.expect("valid normalization");
let expected = RecordBatch::try_from_iter_with_nullable(vec![
("!.1.a", Arc::new(a_field.clone()) as ArrayRef, true),
("!.1.b", Arc::new(b_field.clone()) as ArrayRef, false),
("!.1.c", Arc::new(c_field.clone()) as ArrayRef, true),
("!.2.a", Arc::new(a_field.clone()) as ArrayRef, true),
("!.2.b", Arc::new(b_field.clone()) as ArrayRef, false),
("!.2.c", Arc::new(c_field.clone()) as ArrayRef, true),
])
.expect("valid conversion");
assert_eq!(expected, normalized);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]