This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e5d9816dfb Add `Statistics::distinct_count_opt` and deprecate
`Statistics::distinct_count` (#6259)
e5d9816dfb is described below
commit e5d9816dfb7fc776732526acac3905fcb2764ef4
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Aug 20 11:38:06 2024 -0400
Add `Statistics::distinct_count_opt` and deprecate
`Statistics::distinct_count` (#6259)
---
parquet/src/column/writer/mod.rs | 18 +++++++++---------
parquet/src/file/statistics.rs | 9 ++++++++-
2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 8ea2878317..606ad462d1 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -1842,7 +1842,7 @@ mod tests {
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::Int32(stats) = stats {
assert_eq!(stats.min_opt().unwrap(), &1);
assert_eq!(stats.max_opt().unwrap(), &4);
@@ -1968,7 +1968,7 @@ mod tests {
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count().unwrap_or(0), 55);
+ assert_eq!(stats.distinct_count_opt().unwrap_or(0), 55);
if let Statistics::Int32(stats) = stats {
assert_eq!(stats.min_opt().unwrap(), &-17);
assert_eq!(stats.max_opt().unwrap(), &9000);
@@ -1999,7 +1999,7 @@ mod tests {
assert_eq!(stats.min_bytes_opt().unwrap(), 1_i32.to_le_bytes());
assert_eq!(stats.max_bytes_opt().unwrap(), 7_i32.to_le_bytes());
assert_eq!(stats.null_count_opt(), Some(0));
- assert!(stats.distinct_count().is_none());
+ assert!(stats.distinct_count_opt().is_none());
drop(write);
@@ -2031,7 +2031,7 @@ mod tests {
7_i32.to_le_bytes()
);
assert_eq!(page_statistics.null_count_opt(), Some(0));
- assert!(page_statistics.distinct_count().is_none());
+ assert!(page_statistics.distinct_count_opt().is_none());
}
#[test]
@@ -2698,7 +2698,7 @@ mod tests {
if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::Int32(stats) = stats {
// first page is [1,2,3,4]
// second page is [-5,2,4,8]
@@ -2758,7 +2758,7 @@ mod tests {
if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(stats) = stats {
let column_index_min_value = &column_index.min_values[0];
let column_index_max_value = &column_index.max_values[0];
@@ -2830,7 +2830,7 @@ mod tests {
if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
let column_index_min_value = &column_index.min_values[0];
let column_index_max_value = &column_index.max_values[0];
@@ -2951,7 +2951,7 @@ mod tests {
let stats = r.metadata.statistics().expect("statistics");
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::ByteArray(_stats) = stats {
let min_value = _stats.min_opt().unwrap();
let max_value = _stats.max_opt().unwrap();
@@ -3003,7 +3003,7 @@ mod tests {
let stats = r.metadata.statistics().expect("statistics");
assert_eq!(stats.null_count_opt(), Some(0));
- assert_eq!(stats.distinct_count(), None);
+ assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
let min_value = _stats.min_opt().unwrap();
let max_value = _stats.max_opt().unwrap();
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index 4134685ffc..680c75d6b2 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -257,7 +257,7 @@ pub fn to_thrift(stats: Option<&Statistics>) ->
Option<TStatistics> {
max: None,
min: None,
null_count,
- distinct_count: stats.distinct_count().map(|value| value as i64),
+ distinct_count: stats.distinct_count_opt().map(|value| value as i64),
max_value: None,
min_value: None,
is_max_value_exact: None,
@@ -380,7 +380,14 @@ impl Statistics {
/// Returns optional value of number of distinct values occurring.
/// When it is `None`, the value should be ignored.
+ #[deprecated(since = "53.0.0", note = "Use `distinct_count_opt` method
instead")]
pub fn distinct_count(&self) -> Option<u64> {
+ self.distinct_count_opt()
+ }
+
+ /// Returns optional value of number of distinct values occurring.
+ /// When it is `None`, the value should be ignored.
+ pub fn distinct_count_opt(&self) -> Option<u64> {
statistics_enum_func![self, distinct_count]
}