This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ee5694078c fix panic in `ParquetMetadata::memory_size`: check 
has_min_max_set before invoking min()/max() (#6092)
ee5694078c is described below

commit ee5694078c86c8201549654246900a4232d531a9
Author: Fischer <[email protected]>
AuthorDate: Sat Jul 20 19:01:01 2024 +0800

    fix panic in `ParquetMetadata::memory_size`: check has_min_max_set before 
invoking min()/max() (#6092)
    
    * fix: check has_min_max_set before invoking min()/max()
    
    * chore: add unit test for statistics heap size
    
    * Fixup test
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 parquet/src/file/metadata/memory.rs |  9 ++++++++-
 parquet/src/file/metadata/mod.rs    | 32 ++++++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/parquet/src/file/metadata/memory.rs 
b/parquet/src/file/metadata/memory.rs
index 57b2f7eec0..57d5aaa2dd 100644
--- a/parquet/src/file/metadata/memory.rs
+++ b/parquet/src/file/metadata/memory.rs
@@ -173,7 +173,14 @@ impl<T: ParquetValueType> HeapSize for PageIndex<T> {
 
 impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
     fn heap_size(&self) -> usize {
-        self.min().heap_size() + self.max().heap_size()
+        if self.has_min_max_set() {
+            return self.min().heap_size() + self.max().heap_size();
+        } else if self.min_is_exact() {
+            return self.min().heap_size();
+        } else if self.max_is_exact() {
+            return self.max().heap_size();
+        }
+        0
     }
 }
 impl HeapSize for bool {
diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs
index 1d338bb8ee..9f2084a35d 100644
--- a/parquet/src/file/metadata/mod.rs
+++ b/parquet/src/file/metadata/mod.rs
@@ -1291,7 +1291,11 @@ mod tests {
         let columns = schema_descr
             .columns()
             .iter()
-            .map(|column_descr| 
ColumnChunkMetaData::builder(column_descr.clone()).build())
+            .map(|column_descr| {
+                ColumnChunkMetaData::builder(column_descr.clone())
+                    .set_statistics(Statistics::new::<i32>(None, None, None, 
0, false))
+                    .build()
+            })
             .collect::<Result<Vec<_>>>()
             .unwrap();
         let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
@@ -1317,11 +1321,31 @@ mod tests {
             num_rows,
             created_by,
             key_value_metadata,
-            schema_descr,
+            schema_descr.clone(),
             column_orders,
         );
-        let parquet_meta = ParquetMetaData::new(file_metadata.clone(), 
row_group_meta.clone());
-        let base_expected_size = 1320;
+
+        // Now, add in Exact Statistics
+        let columns_with_stats = schema_descr
+            .columns()
+            .iter()
+            .map(|column_descr| {
+                ColumnChunkMetaData::builder(column_descr.clone())
+                    .set_statistics(Statistics::new::<i32>(Some(0), Some(100), 
None, 0, false))
+                    .build()
+            })
+            .collect::<Result<Vec<_>>>()
+            .unwrap();
+
+        let row_group_meta_with_stats = RowGroupMetaData::builder(schema_descr)
+            .set_num_rows(1000)
+            .set_column_metadata(columns_with_stats)
+            .build()
+            .unwrap();
+        let row_group_meta_with_stats = vec![row_group_meta_with_stats];
+
+        let parquet_meta = ParquetMetaData::new(file_metadata.clone(), 
row_group_meta_with_stats);
+        let base_expected_size = 2024;
         assert_eq!(parquet_meta.memory_size(), base_expected_size);
 
         let mut column_index = ColumnIndexBuilder::new();

Reply via email to