This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ae0d82ccb8 Test Disabled Page Statistics (#4587) (#4589)
ae0d82ccb8 is described below
commit ae0d82ccb8fa679e67c1340e055e7c4cef8c605e
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Aug 1 12:06:25 2023 +0100
Test Disabled Page Statistics (#4587) (#4589)
* Test disabling page index statistics (#4587)
* Apply suggestions from code review
Co-authored-by: Andrew Lamb <[email protected]>
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet/src/file/writer.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 12da085ed2..3b2dd82894 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -742,6 +742,8 @@ mod tests {
use crate::column::reader::get_typed_column_reader;
use crate::compression::{create_codec, Codec, CodecOptionsBuilder};
use crate::data_type::{BoolType, Int32Type};
+ use crate::file::page_index::index::Index;
+ use crate::file::properties::EnabledStatistics;
use crate::file::reader::ChunkReader;
use crate::file::serialized_reader::ReadOptionsBuilder;
use crate::file::{
@@ -1648,4 +1650,62 @@ mod tests {
let reader = SerializedFileReader::new_with_options(file,
options).unwrap();
test_read(reader);
}
+
+ #[test]
+ fn test_disabled_statistics() {
+ let message_type = "
+ message test_schema {
+ REQUIRED INT32 a;
+ REQUIRED INT32 b;
+ }
+ ";
+ let schema = Arc::new(parse_message_type(message_type).unwrap());
+ let props = WriterProperties::builder()
+ .set_statistics_enabled(EnabledStatistics::None)
+ .set_column_statistics_enabled("a".into(), EnabledStatistics::Page)
+ .build();
+ let mut file = Vec::with_capacity(1024);
+ let mut file_writer =
+ SerializedFileWriter::new(&mut file, schema,
Arc::new(props)).unwrap();
+
+ let mut row_group_writer = file_writer.next_row_group().unwrap();
+ let mut a_writer = row_group_writer.next_column().unwrap().unwrap();
+ let col_writer = a_writer.typed::<Int32Type>();
+ col_writer.write_batch(&[1, 2, 3], None, None).unwrap();
+ a_writer.close().unwrap();
+
+ let mut b_writer = row_group_writer.next_column().unwrap().unwrap();
+ let col_writer = b_writer.typed::<Int32Type>();
+ col_writer.write_batch(&[4, 5, 6], None, None).unwrap();
+ b_writer.close().unwrap();
+ row_group_writer.close().unwrap();
+
+ let metadata = file_writer.close().unwrap();
+ assert_eq!(metadata.row_groups.len(), 1);
+ let row_group = &metadata.row_groups[0];
+ assert_eq!(row_group.columns.len(), 2);
+ // Column "a" has both offset and column index, as requested
+ assert!(row_group.columns[0].offset_index_offset.is_some());
+ assert!(row_group.columns[0].column_index_offset.is_some());
+ // Column "b" should only have offset index
+ assert!(row_group.columns[1].offset_index_offset.is_some());
+ assert!(row_group.columns[1].column_index_offset.is_none());
+
+ let options = ReadOptionsBuilder::new().with_page_index().build();
+ let reader =
+ SerializedFileReader::new_with_options(Bytes::from(file),
options).unwrap();
+
+ let offset_index = reader.metadata().offset_index().unwrap();
+ assert_eq!(offset_index.len(), 1); // 1 row group
+ assert_eq!(offset_index[0].len(), 2); // 2 columns
+
+ let column_index = reader.metadata().column_index().unwrap();
+ assert_eq!(column_index.len(), 1); // 1 row group
+ assert_eq!(column_index[0].len(), 2); // 2 column
+
+ let a_idx = &column_index[0][0];
+ assert!(matches!(a_idx, Index::INT32(_)), "{a_idx:?}");
+ let b_idx = &column_index[0][1];
+ assert!(matches!(b_idx, Index::NONE), "{b_idx:?}");
+ }
}