sarutak commented on code in PR #7663:
URL: https://github.com/apache/arrow-datafusion/pull/7663#discussion_r1342876564
##########
datafusion/sqllogictest/test_files/avro.slt:
##########
@@ -101,11 +101,11 @@ SELECT id, CAST(string_col AS varchar) FROM
alltypes_plain_multi_files
1 1
# test avro nested records
-query ??
-SELECT f1, f2 FROM nested_records
+query ????
+SELECT f1, f2, f3, f4 FROM nested_records
----
-{ns2.record2.f1_1: aaa, ns2.record2.f1_2: 10, ns2.record2.f1_3:
{ns3.record3.f1_3_1: 3.14}} [{ns4.record4.f2_1: true, ns4.record4.f2_2: 1.2},
{ns4.record4.f2_1: true, ns4.record4.f2_2: 2.2}]
-{ns2.record2.f1_1: bbb, ns2.record2.f1_2: 20, ns2.record2.f1_3:
{ns3.record3.f1_3_1: 3.14}} [{ns4.record4.f2_1: false, ns4.record4.f2_2: 10.2}]
+{f1_1: aaa, f1_2: 10, f1_3: {f1_3_1: 3.14}} [{f2_1: true, f2_2: 1.2}, {f2_1:
true, f2_2: 2.2}] {f3_1: xyz} [{f4_1: 200}, {f4_1: }]
Review Comment:
`[{f4_1: 200}, {f4_1: }]` seems a little bit weird. it should be like
`[{f4_1: 200}, NULL]` or `[{f4_1: 200}, ]`?
##########
datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs:
##########
@@ -35,7 +35,7 @@ use crate::arrow::error::ArrowError;
use crate::arrow::record_batch::RecordBatch;
use crate::arrow::util::bit_util;
use crate::error::{DataFusionError, Result};
-use apache_avro::schema::RecordSchema;
+use apache_avro::schema::{RecordSchema, UnionSchema};
Review Comment:
`UnionSchema` is unused.
##########
datafusion/core/src/datasource/avro_to_arrow/schema.rs:
##########
@@ -129,8 +129,8 @@ fn schema_to_field_with_props(
}*/
schema_to_field_with_props(
&field.schema,
- Some(&format!("{}.{}", name.fullname(None),
field.name)),
- false,
+ Some(&field.name),
+ nullable,
Review Comment:
I don't think this change is correct because nullability of a record an its
children are independent.
##########
datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs:
##########
@@ -97,15 +97,15 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
schema_lookup: &'b mut BTreeMap<String, usize>,
) -> Result<&'b BTreeMap<String, usize>> {
match schema {
- AvroSchema::Record(RecordSchema {
- name,
- fields,
- lookup,
- ..
- }) => {
+ AvroSchema::Union(union_schema) => {
+ if union_schema.is_nullable() {
+ let rec_schema = &union_schema.variants()[1];
+ Self::child_schema_lookup(rec_schema, schema_lookup)?;
+ }
+ }
+ AvroSchema::Record(RecordSchema { fields, lookup, .. }) => {
lookup.iter().for_each(|(field_name, pos)| {
- schema_lookup
- .insert(format!("{}.{}", name.fullname(None),
field_name), *pos);
+ schema_lookup.insert(field_name.clone(), *pos);
Review Comment:
This part looks good to me.
I agree that it's better not to use full name of a record.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]