This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2121150 [Parquet] Read list field correctly (#234)
2121150 is described below
commit 2121150a0d5536865f4acdf8ee440b900d236e06
Author: Wakahisa <[email protected]>
AuthorDate: Thu Apr 29 18:26:40 2021 +0200
[Parquet] Read list field correctly (#234)
---
parquet/src/arrow/array_reader.rs | 13 +++++++++++--
parquet/src/arrow/arrow_writer.rs | 4 ++--
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/parquet/src/arrow/array_reader.rs
b/parquet/src/arrow/array_reader.rs
index a906147..943820f 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader.rs
@@ -1303,6 +1303,15 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a
ArrayReaderBuilderContext
item_type
))),
_ => {
+ // a list is a group type with a single child. The list child's
+ // name comes from the child's field name.
+ let mut list_child =
list_type.get_fields().first().ok_or(ArrowError(
+ "List GroupType should have a field".to_string(),
+ ))?;
+ // if the child's name is "list" and it has a child, then use
this child
+ if list_child.name() == "list" &&
!list_child.get_fields().is_empty() {
+ list_child = list_child.get_fields().first().unwrap();
+ }
let arrow_type = self
.arrow_schema
.field_with_name(list_type.name())
@@ -1310,9 +1319,9 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a
ArrayReaderBuilderContext
.map(|f| f.data_type().to_owned())
.unwrap_or_else(|| {
ArrowType::List(Box::new(Field::new(
- list_type.name(),
+ list_child.name(),
item_reader_type.clone(),
- list_type.is_optional(),
+ list_child.is_optional(),
)))
});
diff --git a/parquet/src/arrow/arrow_writer.rs
b/parquet/src/arrow/arrow_writer.rs
index e8eaf33..c400cc1 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -92,6 +92,8 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
let mut row_group_writer = self.writer.next_row_group()?;
for (array, field) in
batch.columns().iter().zip(batch.schema().fields()) {
let mut levels = batch_level.calculate_array_levels(array, field);
+ // Reverse levels as we pop() them when writing arrays
+ levels.reverse();
write_leaves(&mut row_group_writer, array, &mut levels)?;
}
@@ -741,7 +743,6 @@ mod tests {
}
#[test]
- #[ignore = "See ARROW-11294, data is correct but list field name is
incorrect"]
fn arrow_writer_complex() {
// define schema
let struct_field_d = Field::new("d", DataType::Float64, true);
@@ -934,7 +935,6 @@ mod tests {
let actual_data = actual_batch.column(i).data();
assert_eq!(expected_data, actual_data);
- // assert_eq!(expected_data, actual_data, "L: {:#?}\nR: {:#?}",
expected_data, actual_data);
}
}