This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8c85d34869 Write Page Offset Index For All-Nan Pages (#4567)
8c85d34869 is described below
commit 8c85d34869e0742b7e9db41a98f0b499f1014830
Author: lee <[email protected]>
AuthorDate: Fri Jul 28 18:21:23 2023 +0800
Write Page Offset Index For All-Nan Pages (#4567)
* fix offset index none
* add test
* add test
* Cleanup
---------
Co-authored-by: guojie.lgj <[email protected]>
Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
parquet/src/arrow/arrow_writer/mod.rs | 21 +++++++++++++++++++++
parquet/src/column/writer/mod.rs | 13 +++++--------
2 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index ccec4ffb20..d3d4e2626f 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1650,6 +1650,27 @@ mod tests {
writer.close().unwrap();
}
+ #[test]
+ fn check_page_offset_index_with_nan() {
+ let values = Arc::new(Float64Array::from(vec![f64::NAN; 10]));
+ let schema = Schema::new(vec![Field::new("col", DataType::Float64,
true)]);
+ let batch = RecordBatch::try_new(Arc::new(schema),
vec![values]).unwrap();
+
+ let mut out = Vec::with_capacity(1024);
+ let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None)
+ .expect("Unable to write file");
+ writer.write(&batch).unwrap();
+ let file_meta_data = writer.close().unwrap();
+ for row_group in file_meta_data.row_groups {
+ for column in row_group.columns {
+ assert!(column.offset_index_offset.is_some());
+ assert!(column.offset_index_length.is_some());
+ assert!(column.column_index_offset.is_none());
+ assert!(column.column_index_length.is_none());
+ }
+ }
+ }
+
#[test]
fn i8_single_column() {
required_and_optional::<Int8Array, _>(0..SMALL_SIZE as i8);
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 1cacfe7933..3d8ce283ae 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -500,14 +500,11 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a,
E> {
let metadata = self.write_column_metadata()?;
self.page_writer.close()?;
- let (column_index, offset_index) = if
self.column_index_builder.valid() {
- // build the column and offset index
- let column_index = self.column_index_builder.build_to_thrift();
- let offset_index = self.offset_index_builder.build_to_thrift();
- (Some(column_index), Some(offset_index))
- } else {
- (None, None)
- };
+ let column_index = self
+ .column_index_builder
+ .valid()
+ .then(|| self.column_index_builder.build_to_thrift());
+ let offset_index = Some(self.offset_index_builder.build_to_thrift());
Ok(ColumnCloseResult {
bytes_written: self.column_metrics.total_bytes_written,