mbrobbel commented on code in PR #5585:
URL: https://github.com/apache/arrow-rs/pull/5585#discussion_r1552207023
##########
arrow-array/src/array/union_array.rs:
##########
@@ -1201,4 +1241,70 @@ mod tests {
assert_eq!(v.len(), 1);
assert_eq!(v.as_string::<i32>().value(0), "baz");
}
+
+ #[test]
+ fn into_parts() {
+ let mut builder = UnionBuilder::new_dense();
+ builder.append::<Int32Type>("a", 1).unwrap();
+ builder.append::<Int8Type>("b", 2).unwrap();
+ builder.append::<Int32Type>("a", 3).unwrap();
+ let dense_union = builder.build().unwrap();
+
+ let field = [
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int8, false),
+ ];
+ let field_type_ids = [0, 1];
+ let (union_fields, union_mode, type_ids, offsets, fields) =
dense_union.into_parts();
+ assert_eq!(
+ union_fields,
+ UnionFields::new(field_type_ids, field.clone())
+ );
+ assert_eq!(union_mode, UnionMode::Dense);
+ assert_eq!(type_ids, [0, 1, 0]);
+ assert!(offsets.is_some());
+ assert_eq!(offsets.as_ref().unwrap(), &[0, 0, 1]);
+ assert_eq!(fields.len(), 2);
+
+ let result = UnionArray::try_new(
+ &[0, 1],
+ type_ids.into_inner(),
+ offsets.map(ScalarBuffer::into_inner),
+ field
+ .clone()
+ .into_iter()
+ .zip(fields.into_iter().flatten())
+ .collect(),
+ );
+ assert!(result.is_ok());
+ assert_eq!(result.unwrap().len(), 3);
+
+ let mut builder = UnionBuilder::new_sparse();
+ builder.append::<Int32Type>("a", 1).unwrap();
+ builder.append::<Int8Type>("b", 2).unwrap();
+ builder.append::<Int32Type>("a", 3).unwrap();
+ let sparse_union = builder.build().unwrap();
+
+ let (union_fields, union_mode, type_ids, offsets, fields) =
sparse_union.into_parts();
+ assert_eq!(
+ union_fields,
+ UnionFields::new(field_type_ids, field.clone())
+ );
+ assert_eq!(union_mode, UnionMode::Sparse);
+ assert_eq!(type_ids, [0, 1, 0]);
+ assert!(offsets.is_none());
+ assert_eq!(fields.len(), 2);
+
+ let result = UnionArray::try_new(
+ &[0, 1],
+ type_ids.into_inner(),
+ offsets.map(ScalarBuffer::into_inner),
+ field
Review Comment:
Makes sense. The constructor expects the order (of the child arrays) to
match the provided fields ids, however, if unknown, one could get these
`type_id`s from the `UnionFields` iterator, which has no order guarantee:
https://github.com/apache/arrow-rs/blob/5a0baf1aeeead4c461f1c8d14bfc65c85cfff78e/arrow-schema/src/fields.rs#L422-L427
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]