This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8c85d34869 Write Page Offset Index For All-Nan Pages (#4567)
8c85d34869 is described below

commit 8c85d34869e0742b7e9db41a98f0b499f1014830
Author: lee <[email protected]>
AuthorDate: Fri Jul 28 18:21:23 2023 +0800

    Write Page Offset Index For All-Nan Pages (#4567)
    
    * fix offset index none
    
    * add test
    
    * add test
    
    * Cleanup
    
    ---------
    
    Co-authored-by: guojie.lgj <[email protected]>
    Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
 parquet/src/arrow/arrow_writer/mod.rs | 21 +++++++++++++++++++++
 parquet/src/column/writer/mod.rs      | 13 +++++--------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/parquet/src/arrow/arrow_writer/mod.rs 
b/parquet/src/arrow/arrow_writer/mod.rs
index ccec4ffb20..d3d4e2626f 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1650,6 +1650,27 @@ mod tests {
         writer.close().unwrap();
     }
 
+    #[test]
+    fn check_page_offset_index_with_nan() {
+        let values = Arc::new(Float64Array::from(vec![f64::NAN; 10]));
+        let schema = Schema::new(vec![Field::new("col", DataType::Float64, 
true)]);
+        let batch = RecordBatch::try_new(Arc::new(schema), 
vec![values]).unwrap();
+
+        let mut out = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None)
+            .expect("Unable to write file");
+        writer.write(&batch).unwrap();
+        let file_meta_data = writer.close().unwrap();
+        for row_group in file_meta_data.row_groups {
+            for column in row_group.columns {
+                assert!(column.offset_index_offset.is_some());
+                assert!(column.offset_index_length.is_some());
+                assert!(column.column_index_offset.is_none());
+                assert!(column.column_index_length.is_none());
+            }
+        }
+    }
+
     #[test]
     fn i8_single_column() {
         required_and_optional::<Int8Array, _>(0..SMALL_SIZE as i8);
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 1cacfe7933..3d8ce283ae 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -500,14 +500,11 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, 
E> {
         let metadata = self.write_column_metadata()?;
         self.page_writer.close()?;
 
-        let (column_index, offset_index) = if 
self.column_index_builder.valid() {
-            // build the column and offset index
-            let column_index = self.column_index_builder.build_to_thrift();
-            let offset_index = self.offset_index_builder.build_to_thrift();
-            (Some(column_index), Some(offset_index))
-        } else {
-            (None, None)
-        };
+        let column_index = self
+            .column_index_builder
+            .valid()
+            .then(|| self.column_index_builder.build_to_thrift());
+        let offset_index = Some(self.offset_index_builder.build_to_thrift());
 
         Ok(ColumnCloseResult {
             bytes_written: self.column_metrics.total_bytes_written,

Reply via email to