This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new aac1844  Fix NullArrayReader (#1245) (#1246)
aac1844 is described below

commit aac1844ce707e3744595472b0357c101a91b5749
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sat Jan 29 15:53:10 2022 +0000

    Fix NullArrayReader (#1245) (#1246)
---
 parquet/src/arrow/array_reader.rs |  4 ++++
 parquet/src/arrow/arrow_reader.rs | 42 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/parquet/src/arrow/array_reader.rs 
b/parquet/src/arrow/array_reader.rs
index 01e54f6..e8d3dff 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader.rs
@@ -214,6 +214,10 @@ where
         // save definition and repetition buffers
         self.def_levels_buffer = self.record_reader.consume_def_levels()?;
         self.rep_levels_buffer = self.record_reader.consume_rep_levels()?;
+
+        // Must consume bitmap buffer
+        self.record_reader.consume_bitmap_buffer()?;
+
         self.record_reader.reset();
         Ok(Arc::new(array))
     }
diff --git a/parquet/src/arrow/arrow_reader.rs 
b/parquet/src/arrow/arrow_reader.rs
index 259a3c0..0dbc118 100644
--- a/parquet/src/arrow/arrow_reader.rs
+++ b/parquet/src/arrow/arrow_reader.rs
@@ -328,6 +328,48 @@ mod tests {
     }
 
     #[test]
+    fn test_null_column_reader_test() {
+        let mut file = tempfile::tempfile().unwrap();
+
+        let schema = "
+            message message {
+                OPTIONAL INT32 int32;
+            }
+        ";
+        let schema = Arc::new(parse_message_type(schema).unwrap());
+
+        let def_levels = vec![vec![0, 0, 0], vec![0, 0, 0, 0]];
+        generate_single_column_file_with_data::<Int32Type>(
+            &[vec![], vec![]],
+            Some(&def_levels),
+            file.try_clone().unwrap(), // Cannot use &mut File (#1163)
+            schema,
+            Some(Field::new("int32", ArrowDataType::Null, true)),
+            &Default::default(),
+        )
+        .unwrap();
+
+        file.rewind().unwrap();
+
+        let parquet_reader = SerializedFileReader::try_from(file).unwrap();
+        let mut arrow_reader = 
ParquetFileArrowReader::new(Arc::new(parquet_reader));
+        let record_reader = arrow_reader.get_record_reader(2).unwrap();
+
+        let batches = record_reader.collect::<ArrowResult<Vec<_>>>().unwrap();
+
+        assert_eq!(batches.len(), 4);
+        for batch in &batches[0..3] {
+            assert_eq!(batch.num_rows(), 2);
+            assert_eq!(batch.num_columns(), 1);
+            assert_eq!(batch.column(0).null_count(), 2);
+        }
+
+        assert_eq!(batches[3].num_rows(), 1);
+        assert_eq!(batches[3].num_columns(), 1);
+        assert_eq!(batches[3].column(0).null_count(), 1);
+    }
+
+    #[test]
     fn test_primitive_single_column_reader_test() {
         run_single_column_reader_tests::<BoolType, BooleanArray, _, BoolType>(
             2,

Reply via email to