This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e5d9816dfb Add `Statistics::distinct_count_opt` and deprecate 
`Statistics::distinct_count` (#6259)
e5d9816dfb is described below

commit e5d9816dfb7fc776732526acac3905fcb2764ef4
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Aug 20 11:38:06 2024 -0400

    Add `Statistics::distinct_count_opt` and deprecate 
`Statistics::distinct_count` (#6259)
---
 parquet/src/column/writer/mod.rs | 18 +++++++++---------
 parquet/src/file/statistics.rs   |  9 ++++++++-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 8ea2878317..606ad462d1 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -1842,7 +1842,7 @@ mod tests {
         assert_eq!(metadata.dictionary_page_offset(), Some(0));
         if let Some(stats) = metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
-            assert_eq!(stats.distinct_count(), None);
+            assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::Int32(stats) = stats {
                 assert_eq!(stats.min_opt().unwrap(), &1);
                 assert_eq!(stats.max_opt().unwrap(), &4);
@@ -1968,7 +1968,7 @@ mod tests {
         assert_eq!(metadata.dictionary_page_offset(), Some(0));
         if let Some(stats) = metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
-            assert_eq!(stats.distinct_count().unwrap_or(0), 55);
+            assert_eq!(stats.distinct_count_opt().unwrap_or(0), 55);
             if let Statistics::Int32(stats) = stats {
                 assert_eq!(stats.min_opt().unwrap(), &-17);
                 assert_eq!(stats.max_opt().unwrap(), &9000);
@@ -1999,7 +1999,7 @@ mod tests {
         assert_eq!(stats.min_bytes_opt().unwrap(), 1_i32.to_le_bytes());
         assert_eq!(stats.max_bytes_opt().unwrap(), 7_i32.to_le_bytes());
         assert_eq!(stats.null_count_opt(), Some(0));
-        assert!(stats.distinct_count().is_none());
+        assert!(stats.distinct_count_opt().is_none());
 
         drop(write);
 
@@ -2031,7 +2031,7 @@ mod tests {
             7_i32.to_le_bytes()
         );
         assert_eq!(page_statistics.null_count_opt(), Some(0));
-        assert!(page_statistics.distinct_count().is_none());
+        assert!(page_statistics.distinct_count_opt().is_none());
     }
 
     #[test]
@@ -2698,7 +2698,7 @@ mod tests {
 
         if let Some(stats) = r.metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
-            assert_eq!(stats.distinct_count(), None);
+            assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::Int32(stats) = stats {
                 // first page is [1,2,3,4]
                 // second page is [-5,2,4,8]
@@ -2758,7 +2758,7 @@ mod tests {
 
         if let Some(stats) = r.metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
-            assert_eq!(stats.distinct_count(), None);
+            assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::FixedLenByteArray(stats) = stats {
                 let column_index_min_value = &column_index.min_values[0];
                 let column_index_max_value = &column_index.max_values[0];
@@ -2830,7 +2830,7 @@ mod tests {
 
         if let Some(stats) = r.metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
-            assert_eq!(stats.distinct_count(), None);
+            assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::FixedLenByteArray(_stats) = stats {
                 let column_index_min_value = &column_index.min_values[0];
                 let column_index_max_value = &column_index.max_values[0];
@@ -2951,7 +2951,7 @@ mod tests {
 
         let stats = r.metadata.statistics().expect("statistics");
         assert_eq!(stats.null_count_opt(), Some(0));
-        assert_eq!(stats.distinct_count(), None);
+        assert_eq!(stats.distinct_count_opt(), None);
         if let Statistics::ByteArray(_stats) = stats {
             let min_value = _stats.min_opt().unwrap();
             let max_value = _stats.max_opt().unwrap();
@@ -3003,7 +3003,7 @@ mod tests {
 
         let stats = r.metadata.statistics().expect("statistics");
         assert_eq!(stats.null_count_opt(), Some(0));
-        assert_eq!(stats.distinct_count(), None);
+        assert_eq!(stats.distinct_count_opt(), None);
         if let Statistics::FixedLenByteArray(_stats) = stats {
             let min_value = _stats.min_opt().unwrap();
             let max_value = _stats.max_opt().unwrap();
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index 4134685ffc..680c75d6b2 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -257,7 +257,7 @@ pub fn to_thrift(stats: Option<&Statistics>) -> 
Option<TStatistics> {
         max: None,
         min: None,
         null_count,
-        distinct_count: stats.distinct_count().map(|value| value as i64),
+        distinct_count: stats.distinct_count_opt().map(|value| value as i64),
         max_value: None,
         min_value: None,
         is_max_value_exact: None,
@@ -380,7 +380,14 @@ impl Statistics {
 
     /// Returns optional value of number of distinct values occurring.
     /// When it is `None`, the value should be ignored.
+    #[deprecated(since = "53.0.0", note = "Use `distinct_count_opt` method 
instead")]
     pub fn distinct_count(&self) -> Option<u64> {
+        self.distinct_count_opt()
+    }
+
+    /// Returns optional value of number of distinct values occurring.
+    /// When it is `None`, the value should be ignored.
+    pub fn distinct_count_opt(&self) -> Option<u64> {
         statistics_enum_func![self, distinct_count]
     }
 

Reply via email to