This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ad56b7ef6f Support Date Parquet Data Page Statistics (#11135)
ad56b7ef6f is described below

commit ad56b7ef6f964b93640a56697f876e4fe7e367e7
Author: Dharan Aditya <[email protected]>
AuthorDate: Thu Jun 27 23:56:39 2024 +0530

    Support Date Parquet Data Page Statistics (#11135)
    
    * add Date Parquet Data Page Statistics
    
    * indentation for readability
---
 .../src/datasource/physical_plan/parquet/statistics.rs     | 14 ++++++++++++++
 datafusion/core/tests/parquet/arrow_statistics.rs          |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs 
b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 44bacbdae1..e6babe6bf0 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -701,6 +701,20 @@ macro_rules! get_data_page_statistics {
                         TimeUnit::Nanosecond => 
Arc::new(TimestampNanosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
                     })
                 },
+                Some(DataType::Date32) => 
Ok(Arc::new(Date32Array::from_iter([<$stat_type_prefix 
Int32DataPageStatsIterator>]::new($iterator).flatten()))),
+                Some(DataType::Date64) => Ok(
+                    Arc::new(
+                        Date64Array::from([<$stat_type_prefix 
Int32DataPageStatsIterator>]::new($iterator)
+                            .map(|x| {
+                                x.into_iter()
+                                .filter_map(|x| {
+                                    x.and_then(|x| i64::try_from(x).ok())
+                                })
+                                .map(|x| x * 24 * 60 * 60 * 1000)
+                            }).flatten().collect::<Vec<_>>()
+                        )
+                    )
+                ),
                 _ => unimplemented!()
             }
         }
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs 
b/datafusion/core/tests/parquet/arrow_statistics.rs
index 596015d581..217281d5a6 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -1181,7 +1181,7 @@ async fn test_dates_32_diff_rg_sizes() {
         // row counts are [13, 7]
         expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
         column_name: "date32",
-        check: Check::RowGroup,
+        check: Check::Both,
     }
     .run();
 }
@@ -1324,7 +1324,7 @@ async fn test_dates_64_diff_rg_sizes() {
         expected_null_counts: UInt64Array::from(vec![2, 2]),
         expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
         column_name: "date64",
-        check: Check::RowGroup,
+        check: Check::Both,
     }
     .run();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to