This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 35f481dbcc Refactor min/max value update in Parquet statistics (#9120)
35f481dbcc is described below

commit 35f481dbcc8147e0c4a397de80eb8ac74ca4ce9a
Author: Alex Huang <[email protected]>
AuthorDate: Mon Feb 5 20:28:31 2024 +0800

    Refactor min/max value update in Parquet statistics (#9120)
    
    * Refactor min/max value update in Parquet statistics
    
    * omit has_min_max_set
---
 .../core/src/datasource/file_format/parquet.rs     | 194 ++++++---------------
 1 file changed, 55 insertions(+), 139 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/parquet.rs 
b/datafusion/core/src/datasource/file_format/parquet.rs
index 408233469e..89ec81630c 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -303,155 +303,71 @@ fn summarize_min_max(
     i: usize,
     stat: &ParquetStatistics,
 ) {
+    if !stat.has_min_max_set() {
+        max_values[i] = None;
+        min_values[i] = None;
+        return;
+    }
     match stat {
-        ParquetStatistics::Boolean(s) => {
-            if let DataType::Boolean = fields[i].data_type() {
-                if s.has_min_max_set() {
-                    if let Some(max_value) = &mut max_values[i] {
-                        match 
max_value.update_batch(&[Arc::new(BooleanArray::from(
-                            vec![Some(*s.max())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                max_values[i] = None;
-                            }
-                        }
-                    }
-                    if let Some(min_value) = &mut min_values[i] {
-                        match 
min_value.update_batch(&[Arc::new(BooleanArray::from(
-                            vec![Some(*s.min())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                min_values[i] = None;
-                            }
-                        }
-                    }
-                    return;
-                }
+        ParquetStatistics::Boolean(s) if DataType::Boolean == 
*fields[i].data_type() => {
+            if let Some(max_value) = &mut max_values[i] {
+                max_value
+                    
.update_batch(&[Arc::new(BooleanArray::from(vec![*s.max()]))])
+                    .unwrap_or_else(|_| max_values[i] = None);
+            }
+            if let Some(min_value) = &mut min_values[i] {
+                min_value
+                    
.update_batch(&[Arc::new(BooleanArray::from(vec![*s.min()]))])
+                    .unwrap_or_else(|_| min_values[i] = None);
             }
-            max_values[i] = None;
-            min_values[i] = None;
         }
-        ParquetStatistics::Int32(s) => {
-            if let DataType::Int32 = fields[i].data_type() {
-                if s.has_min_max_set() {
-                    if let Some(max_value) = &mut max_values[i] {
-                        match 
max_value.update_batch(&[Arc::new(Int32Array::from_value(
-                            *s.max(),
-                            1,
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                max_values[i] = None;
-                            }
-                        }
-                    }
-                    if let Some(min_value) = &mut min_values[i] {
-                        match 
min_value.update_batch(&[Arc::new(Int32Array::from_value(
-                            *s.min(),
-                            1,
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                min_values[i] = None;
-                            }
-                        }
-                    }
-                    return;
-                }
+        ParquetStatistics::Int32(s) if DataType::Int32 == 
*fields[i].data_type() => {
+            if let Some(max_value) = &mut max_values[i] {
+                max_value
+                    .update_batch(&[Arc::new(Int32Array::from_value(*s.max(), 
1))])
+                    .unwrap_or_else(|_| max_values[i] = None);
+            }
+            if let Some(min_value) = &mut min_values[i] {
+                min_value
+                    .update_batch(&[Arc::new(Int32Array::from_value(*s.min(), 
1))])
+                    .unwrap_or_else(|_| min_values[i] = None);
             }
-            max_values[i] = None;
-            min_values[i] = None;
         }
-        ParquetStatistics::Int64(s) => {
-            if let DataType::Int64 = fields[i].data_type() {
-                if s.has_min_max_set() {
-                    if let Some(max_value) = &mut max_values[i] {
-                        match 
max_value.update_batch(&[Arc::new(Int64Array::from_value(
-                            *s.max(),
-                            1,
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                max_values[i] = None;
-                            }
-                        }
-                    }
-                    if let Some(min_value) = &mut min_values[i] {
-                        match 
min_value.update_batch(&[Arc::new(Int64Array::from_value(
-                            *s.min(),
-                            1,
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                min_values[i] = None;
-                            }
-                        }
-                    }
-                    return;
-                }
+        ParquetStatistics::Int64(s) if DataType::Int64 == 
*fields[i].data_type() => {
+            if let Some(max_value) = &mut max_values[i] {
+                max_value
+                    .update_batch(&[Arc::new(Int64Array::from_value(*s.max(), 
1))])
+                    .unwrap_or_else(|_| max_values[i] = None);
+            }
+            if let Some(min_value) = &mut min_values[i] {
+                min_value
+                    .update_batch(&[Arc::new(Int64Array::from_value(*s.min(), 
1))])
+                    .unwrap_or_else(|_| min_values[i] = None);
             }
-            max_values[i] = None;
-            min_values[i] = None;
         }
-        ParquetStatistics::Float(s) => {
-            if let DataType::Float32 = fields[i].data_type() {
-                if s.has_min_max_set() {
-                    if let Some(max_value) = &mut max_values[i] {
-                        match 
max_value.update_batch(&[Arc::new(Float32Array::from(
-                            vec![Some(*s.max())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                max_values[i] = None;
-                            }
-                        }
-                    }
-                    if let Some(min_value) = &mut min_values[i] {
-                        match 
min_value.update_batch(&[Arc::new(Float32Array::from(
-                            vec![Some(*s.min())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                min_values[i] = None;
-                            }
-                        }
-                    }
-                    return;
-                }
+        ParquetStatistics::Float(s) if DataType::Float32 == 
*fields[i].data_type() => {
+            if let Some(max_value) = &mut max_values[i] {
+                max_value
+                    
.update_batch(&[Arc::new(Float32Array::from(vec![*s.max()]))])
+                    .unwrap_or_else(|_| max_values[i] = None);
+            }
+            if let Some(min_value) = &mut min_values[i] {
+                min_value
+                    
.update_batch(&[Arc::new(Float32Array::from(vec![*s.min()]))])
+                    .unwrap_or_else(|_| min_values[i] = None);
             }
-            max_values[i] = None;
-            min_values[i] = None;
         }
-        ParquetStatistics::Double(s) => {
-            if let DataType::Float64 = fields[i].data_type() {
-                if s.has_min_max_set() {
-                    if let Some(max_value) = &mut max_values[i] {
-                        match 
max_value.update_batch(&[Arc::new(Float64Array::from(
-                            vec![Some(*s.max())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                max_values[i] = None;
-                            }
-                        }
-                    }
-                    if let Some(min_value) = &mut min_values[i] {
-                        match 
min_value.update_batch(&[Arc::new(Float64Array::from(
-                            vec![Some(*s.min())],
-                        ))]) {
-                            Ok(_) => {}
-                            Err(_) => {
-                                min_values[i] = None;
-                            }
-                        }
-                    }
-                    return;
-                }
+        ParquetStatistics::Double(s) if DataType::Float64 == 
*fields[i].data_type() => {
+            if let Some(max_value) = &mut max_values[i] {
+                max_value
+                    
.update_batch(&[Arc::new(Float64Array::from(vec![*s.max()]))])
+                    .unwrap_or_else(|_| max_values[i] = None);
+            }
+            if let Some(min_value) = &mut min_values[i] {
+                min_value
+                    
.update_batch(&[Arc::new(Float64Array::from(vec![*s.min()]))])
+                    .unwrap_or_else(|_| min_values[i] = None);
             }
-            max_values[i] = None;
-            min_values[i] = None;
         }
         _ => {
             max_values[i] = None;

Reply via email to