This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ad56b7ef6f Support Date Parquet Data Page Statistics (#11135)
ad56b7ef6f is described below
commit ad56b7ef6f964b93640a56697f876e4fe7e367e7
Author: Dharan Aditya <[email protected]>
AuthorDate: Thu Jun 27 23:56:39 2024 +0530
Support Date Parquet Data Page Statistics (#11135)
* add Date Parquet Data Page Statistics
* indentation for readability
---
.../src/datasource/physical_plan/parquet/statistics.rs | 14 ++++++++++++++
datafusion/core/tests/parquet/arrow_statistics.rs | 4 ++--
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 44bacbdae1..e6babe6bf0 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -701,6 +701,20 @@ macro_rules! get_data_page_statistics {
TimeUnit::Nanosecond =>
Arc::new(TimestampNanosecondArray::from_iter(iter).with_timezone_opt(timezone.clone())),
})
},
+ Some(DataType::Date32) =>
Ok(Arc::new(Date32Array::from_iter([<$stat_type_prefix
Int32DataPageStatsIterator>]::new($iterator).flatten()))),
+ Some(DataType::Date64) => Ok(
+ Arc::new(
+ Date64Array::from([<$stat_type_prefix
Int32DataPageStatsIterator>]::new($iterator)
+ .map(|x| {
+ x.into_iter()
+ .filter_map(|x| {
+ x.and_then(|x| i64::try_from(x).ok())
+ })
+ .map(|x| x * 24 * 60 * 60 * 1000)
+ }).flatten().collect::<Vec<_>>()
+ )
+ )
+ ),
_ => unimplemented!()
}
}
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs
b/datafusion/core/tests/parquet/arrow_statistics.rs
index 596015d581..217281d5a6 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -1181,7 +1181,7 @@ async fn test_dates_32_diff_rg_sizes() {
// row counts are [13, 7]
expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
column_name: "date32",
- check: Check::RowGroup,
+ check: Check::Both,
}
.run();
}
@@ -1324,7 +1324,7 @@ async fn test_dates_64_diff_rg_sizes() {
expected_null_counts: UInt64Array::from(vec![2, 2]),
expected_row_counts: Some(UInt64Array::from(vec![13, 7])),
column_name: "date64",
- check: Check::RowGroup,
+ check: Check::Both,
}
.run();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]