This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2121150  [Parquet] Read list field correctly (#234)
2121150 is described below

commit 2121150a0d5536865f4acdf8ee440b900d236e06
Author: Wakahisa <[email protected]>
AuthorDate: Thu Apr 29 18:26:40 2021 +0200

    [Parquet] Read list field correctly (#234)
---
 parquet/src/arrow/array_reader.rs | 13 +++++++++++--
 parquet/src/arrow/arrow_writer.rs |  4 ++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/array_reader.rs 
b/parquet/src/arrow/array_reader.rs
index a906147..943820f 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader.rs
@@ -1303,6 +1303,15 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a 
ArrayReaderBuilderContext
                 item_type
             ))),
             _ => {
+                // a list is a group type with a single child. The list child's
+                // name comes from the child's field name.
+                let mut list_child = 
list_type.get_fields().first().ok_or(ArrowError(
+                    "List GroupType should have a field".to_string(),
+                ))?;
+                // if the child's name is "list" and it has a child, then use 
this child
+                if list_child.name() == "list" && 
!list_child.get_fields().is_empty() {
+                    list_child = list_child.get_fields().first().unwrap();
+                }
                 let arrow_type = self
                     .arrow_schema
                     .field_with_name(list_type.name())
@@ -1310,9 +1319,9 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a 
ArrayReaderBuilderContext
                     .map(|f| f.data_type().to_owned())
                     .unwrap_or_else(|| {
                         ArrowType::List(Box::new(Field::new(
-                            list_type.name(),
+                            list_child.name(),
                             item_reader_type.clone(),
-                            list_type.is_optional(),
+                            list_child.is_optional(),
                         )))
                     });
 
diff --git a/parquet/src/arrow/arrow_writer.rs 
b/parquet/src/arrow/arrow_writer.rs
index e8eaf33..c400cc1 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -92,6 +92,8 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
         let mut row_group_writer = self.writer.next_row_group()?;
         for (array, field) in 
batch.columns().iter().zip(batch.schema().fields()) {
             let mut levels = batch_level.calculate_array_levels(array, field);
+            // Reverse levels as we pop() them when writing arrays
+            levels.reverse();
             write_leaves(&mut row_group_writer, array, &mut levels)?;
         }
 
@@ -741,7 +743,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore = "See ARROW-11294, data is correct but list field name is 
incorrect"]
     fn arrow_writer_complex() {
         // define schema
         let struct_field_d = Field::new("d", DataType::Float64, true);
@@ -934,7 +935,6 @@ mod tests {
             let actual_data = actual_batch.column(i).data();
 
             assert_eq!(expected_data, actual_data);
-            // assert_eq!(expected_data, actual_data, "L: {:#?}\nR: {:#?}", 
expected_data, actual_data);
         }
     }
 

Reply via email to