This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5740774acc Fix AvroReader: Add union resolving for nested struct
arrays (#12686)
5740774acc is described below
commit 5740774acc9febb51fafb17235066c4c5ef2b32a
Author: JonasDev1 <[email protected]>
AuthorDate: Thu Oct 3 00:07:07 2024 +0200
Fix AvroReader: Add union resolving for nested struct arrays (#12686)
* Add union resolving for nested struct arrays
* Add test
* Change test
* Reproduce index error
* fmt
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
.../datasource/avro_to_arrow/arrow_array_reader.rs | 89 +++++++++++++++++++++-
1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
index 3a5d50bba0..98b6702bc3 100644
--- a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
@@ -573,7 +573,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
// extract list values, with non-lists converted to Value::Null
let array_item_count = rows
.iter()
- .map(|row| match row {
+ .map(|row| match maybe_resolve_union(row) {
Value::Array(values) => values.len(),
_ => 1,
})
@@ -1643,6 +1643,93 @@ mod test {
assert_batches_eq!(expected, &[batch]);
}
+ #[test]
+ fn test_avro_nullable_struct_array() {
+ let schema = apache_avro::Schema::parse_str(
+ r#"
+ {
+ "type": "record",
+ "name": "r1",
+ "fields": [
+ {
+ "name": "col1",
+ "type": [
+ "null",
+ {
+ "type": "array",
+ "items": {
+ "type": [
+ "null",
+ {
+ "type": "record",
+ "name": "Item",
+ "fields": [
+ {
+ "name": "id",
+ "type": "long"
+ }
+ ]
+ }
+ ]
+ }
+ }
+ ],
+ "default": null
+ }
+ ]
+ }"#,
+ )
+ .unwrap();
+ let jv1 = serde_json::json!({
+ "col1": [
+ {
+ "id": 234
+ },
+ {
+ "id": 345
+ }
+ ]
+ });
+ let r1 = apache_avro::to_value(jv1)
+ .unwrap()
+ .resolve(&schema)
+ .unwrap();
+ let r2 = apache_avro::to_value(serde_json::json!({ "col1": null }))
+ .unwrap()
+ .resolve(&schema)
+ .unwrap();
+
+ let mut w = apache_avro::Writer::new(&schema, vec![]);
+ for _i in 0..5 {
+ w.append(r1.clone()).unwrap();
+ }
+ w.append(r2).unwrap();
+ let bytes = w.into_inner().unwrap();
+
+ let mut reader = ReaderBuilder::new()
+ .read_schema()
+ .with_batch_size(20)
+ .build(std::io::Cursor::new(bytes))
+ .unwrap();
+ let batch = reader.next().unwrap().unwrap();
+ assert_eq!(batch.num_rows(), 6);
+ assert_eq!(batch.num_columns(), 1);
+
+ let expected = [
+ "+------------------------+",
+ "| col1 |",
+ "+------------------------+",
+ "| [{id: 234}, {id: 345}] |",
+ "| [{id: 234}, {id: 345}] |",
+ "| [{id: 234}, {id: 345}] |",
+ "| [{id: 234}, {id: 345}] |",
+ "| [{id: 234}, {id: 345}] |",
+ "| |",
+ "+------------------------+",
+ ];
+ assert_batches_eq!(expected, &[batch]);
+ }
+
#[test]
fn test_avro_iterator() {
let reader = build_reader("alltypes_plain.avro", 5);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]