This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 6859c877cd Handle primitive REPEATED field not contained in LIST 
annotated group (#6649)
6859c877cd is described below

commit 6859c877cd44cff63c46a1c2e14592538bd09934
Author: Ze'ev Maor <[email protected]>
AuthorDate: Sat Nov 2 12:30:30 2024 +0200

    Handle primitive REPEATED field not contained in LIST annotated group 
(#6649)
    
    * Handle primitive REPEATED field not contained in LIST annotated group
    
    * cargo fmt
    
    * Add UT
    
    * cargo fmt
    
    * comment
    
    * clippy
    
    * clippy
    
    * update parquet-testing module
    
    * cargo fmt
    
    ---------
    
    Co-authored-by: Ze'ev Maor <[email protected]>
---
 parquet-testing              |   2 +-
 parquet/src/record/reader.rs | 137 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/parquet-testing b/parquet-testing
index 50af3d8ce2..550368ca77 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit 50af3d8ce206990d81014b1862e5ce7380dc3e08
+Subproject commit 550368ca77b97231efead39251a96bd6f8f08c6e
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index 1f9128a8b4..fd6ca7cdd5 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -138,7 +138,17 @@ impl TreeBuilder {
                 .column_descr_ptr();
             let col_reader = row_group_reader.get_column_reader(orig_index)?;
             let column = TripletIter::new(col_descr, col_reader, 
self.batch_size);
-            Reader::PrimitiveReader(field, Box::new(column))
+            let reader = Reader::PrimitiveReader(field.clone(), 
Box::new(column));
+            if repetition == Repetition::REPEATED {
+                Reader::RepeatedReader(
+                    field,
+                    curr_def_level - 1,
+                    curr_rep_level - 1,
+                    Box::new(reader),
+                )
+            } else {
+                reader
+            }
         } else {
             match field.get_basic_info().converted_type() {
                 // List types
@@ -1688,6 +1698,131 @@ mod tests {
         assert_eq!(rows, expected_rows);
     }
 
+    #[test]
+    fn test_tree_reader_handle_primitive_repeated_fields_with_no_annotation() {
+        // In this test the REPEATED fields are primitives
+        let rows = test_file_reader_rows("repeated_primitive_no_list.parquet", 
None).unwrap();
+        let expected_rows = vec![
+            row![
+                (
+                    "Int32_list".to_string(),
+                    Field::ListInternal(make_list([0, 1, 2, 
3].map(Field::Int).to_vec()))
+                ),
+                (
+                    "String_list".to_string(),
+                    Field::ListInternal(make_list(
+                        ["foo", "zero", "one", "two"]
+                            .map(|s| Field::Str(s.to_string()))
+                            .to_vec()
+                    ))
+                ),
+                (
+                    "group_of_lists".to_string(),
+                    group![
+                        (
+                            "Int32_list_in_group".to_string(),
+                            Field::ListInternal(make_list([0, 1, 2, 
3].map(Field::Int).to_vec()))
+                        ),
+                        (
+                            "String_list_in_group".to_string(),
+                            Field::ListInternal(make_list(
+                                ["foo", "zero", "one", "two"]
+                                    .map(|s| Field::Str(s.to_string()))
+                                    .to_vec()
+                            ))
+                        )
+                    ]
+                )
+            ],
+            row![
+                (
+                    "Int32_list".to_string(),
+                    Field::ListInternal(make_list(vec![]))
+                ),
+                (
+                    "String_list".to_string(),
+                    Field::ListInternal(make_list(
+                        ["three"].map(|s| Field::Str(s.to_string())).to_vec()
+                    ))
+                ),
+                (
+                    "group_of_lists".to_string(),
+                    group![
+                        (
+                            "Int32_list_in_group".to_string(),
+                            Field::ListInternal(make_list(vec![]))
+                        ),
+                        (
+                            "String_list_in_group".to_string(),
+                            Field::ListInternal(make_list(
+                                ["three"].map(|s| 
Field::Str(s.to_string())).to_vec()
+                            ))
+                        )
+                    ]
+                )
+            ],
+            row![
+                (
+                    "Int32_list".to_string(),
+                    Field::ListInternal(make_list(vec![Field::Int(4)]))
+                ),
+                (
+                    "String_list".to_string(),
+                    Field::ListInternal(make_list(
+                        ["four"].map(|s| Field::Str(s.to_string())).to_vec()
+                    ))
+                ),
+                (
+                    "group_of_lists".to_string(),
+                    group![
+                        (
+                            "Int32_list_in_group".to_string(),
+                            Field::ListInternal(make_list(vec![Field::Int(4)]))
+                        ),
+                        (
+                            "String_list_in_group".to_string(),
+                            Field::ListInternal(make_list(
+                                ["four"].map(|s| 
Field::Str(s.to_string())).to_vec()
+                            ))
+                        )
+                    ]
+                )
+            ],
+            row![
+                (
+                    "Int32_list".to_string(),
+                    Field::ListInternal(make_list([5, 6, 7, 
8].map(Field::Int).to_vec()))
+                ),
+                (
+                    "String_list".to_string(),
+                    Field::ListInternal(make_list(
+                        ["five", "six", "seven", "eight"]
+                            .map(|s| Field::Str(s.to_string()))
+                            .to_vec()
+                    ))
+                ),
+                (
+                    "group_of_lists".to_string(),
+                    group![
+                        (
+                            "Int32_list_in_group".to_string(),
+                            Field::ListInternal(make_list([5, 6, 7, 
8].map(Field::Int).to_vec()))
+                        ),
+                        (
+                            "String_list_in_group".to_string(),
+                            Field::ListInternal(make_list(
+                                ["five", "six", "seven", "eight"]
+                                    .map(|s| Field::Str(s.to_string()))
+                                    .to_vec()
+                            ))
+                        )
+                    ]
+                )
+            ],
+        ];
+        assert_eq!(rows, expected_rows);
+    }
+
     fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> 
Result<Vec<Row>> {
         let file = get_test_file(file_name);
         let file_reader: Box<dyn FileReader> = 
Box::new(SerializedFileReader::new(file)?);

Reply via email to