alamb commented on code in PR #7809:
URL: https://github.com/apache/arrow-datafusion/pull/7809#discussion_r1356840656


##########
datafusion/common/src/stats.rs:
##########
@@ -17,11 +17,184 @@
 
 //! This module provides data structures to represent statistics
 
-use std::fmt::Display;
-
+use crate::ScalarValue;
 use arrow::datatypes::DataType;
 
-use crate::ScalarValue;
+use std::fmt::{self, Debug, Display};
+
+/// To deal with information without exactness guarantees, we wrap it inside a
+/// [`Sharpness`] object to express its reliability.

Review Comment:
   ```suggestion
   /// Represents a value with a degree of certainty. `Sharpness` is used to 
   /// propagate information the precision of statistical values.
   ```
   
   While writing this I wonder if naming `Sharpness` to  `Precision` might 
match more closely with what this type represents 🤔 



##########
datafusion/common/src/stats.rs:
##########
@@ -17,11 +17,184 @@
 
 //! This module provides data structures to represent statistics
 
-use std::fmt::Display;
-
+use crate::ScalarValue;
 use arrow::datatypes::DataType;
 
-use crate::ScalarValue;
+use std::fmt::{self, Debug, Display};
+
+/// To deal with information without exactness guarantees, we wrap it inside a
+/// [`Sharpness`] object to express its reliability.
+#[derive(Clone, PartialEq, Eq, Default)]
+pub enum Sharpness<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
+    Exact(T),
+    Inexact(T),
+    #[default]
+    Absent,

Review Comment:
   ```suggestion
       /// The exact value is known 
       Exact(T),
       /// The value is not known exactly, but is likely close to this value
       Inexact(T),
       /// Nothing is known about the value
       #[default]    
       Absent,
   ```



##########
datafusion/common/src/stats.rs:
##########
@@ -17,11 +17,184 @@
 
 //! This module provides data structures to represent statistics
 
-use std::fmt::Display;
-
+use crate::ScalarValue;
 use arrow::datatypes::DataType;
 
-use crate::ScalarValue;
+use std::fmt::{self, Debug, Display};
+
+/// To deal with information without exactness guarantees, we wrap it inside a
+/// [`Sharpness`] object to express its reliability.
+#[derive(Clone, PartialEq, Eq, Default)]
+pub enum Sharpness<T: Debug + Clone + PartialEq + Eq + PartialOrd> {
+    Exact(T),
+    Inexact(T),
+    #[default]
+    Absent,
+}
+
+impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Sharpness<T> {
+    /// If we have some value (exact or inexact), it returns that value.
+    /// Otherwise, it returns `None`.
+    pub fn get_value(&self) -> Option<&T> {
+        match self {
+            Sharpness::Exact(value) | Sharpness::Inexact(value) => Some(value),
+            Sharpness::Absent => None,
+        }
+    }
+
+    /// Transform the value in this [`Sharpness`] object, if one exists, using
+    /// the given function. Preserves the exactness state.
+    pub fn map<F>(self, f: F) -> Sharpness<T>
+    where
+        F: Fn(T) -> T,
+    {
+        match self {
+            Sharpness::Exact(val) => Sharpness::Exact(f(val)),
+            Sharpness::Inexact(val) => Sharpness::Inexact(f(val)),
+            _ => self,
+        }
+    }
+
+    /// Returns `Some(true)` if we have an exact value, `Some(false)` if we
+    /// have an inexact value, and `None` if there is no value.
+    pub fn is_exact(&self) -> Option<bool> {
+        match self {
+            Sharpness::Exact(_) => Some(true),
+            Sharpness::Inexact(_) => Some(false),
+            _ => None,
+        }
+    }
+
+    /// Returns the maximum of two (possibly inexact) values, conservatively
+    /// propagating exactness information. If one of the input values is
+    /// [`Sharpness::Absent`], the result is `Absent` too.
+    pub fn max(&self, other: &Sharpness<T>) -> Sharpness<T> {
+        match (self, other) {
+            (Sharpness::Exact(a), Sharpness::Exact(b)) => {
+                Sharpness::Exact(if a >= b { a.clone() } else { b.clone() })
+            }
+            (Sharpness::Inexact(a), Sharpness::Exact(b))
+            | (Sharpness::Exact(a), Sharpness::Inexact(b))
+            | (Sharpness::Inexact(a), Sharpness::Inexact(b)) => {
+                Sharpness::Inexact(if a >= b { a.clone() } else { b.clone() })
+            }
+            (_, _) => Sharpness::Absent,
+        }
+    }
+
+    /// Returns the minimum of two (possibly inexact) values, conservatively
+    /// propagating exactness information. If one of the input values is
+    /// [`Sharpness::Absent`], the result is `Absent` too.
+    pub fn min(&self, other: &Sharpness<T>) -> Sharpness<T> {
+        match (self, other) {
+            (Sharpness::Exact(a), Sharpness::Exact(b)) => {
+                Sharpness::Exact(if a >= b { b.clone() } else { a.clone() })
+            }
+            (Sharpness::Inexact(a), Sharpness::Exact(b))
+            | (Sharpness::Exact(a), Sharpness::Inexact(b))
+            | (Sharpness::Inexact(a), Sharpness::Inexact(b)) => {
+                Sharpness::Inexact(if a >= b { b.clone() } else { a.clone() })
+            }
+            (_, _) => Sharpness::Absent,
+        }
+    }
+
+    /// Demotes the sharpness state to inexact (if present).

Review Comment:
   ```suggestion
       /// Demotes the sharpness state from exact to inexact (if present)
   ```



##########
datafusion/common/src/stats.rs:
##########
@@ -17,11 +17,184 @@
 
 //! This module provides data structures to represent statistics
 
-use std::fmt::Display;
-
+use crate::ScalarValue;
 use arrow::datatypes::DataType;
 
-use crate::ScalarValue;
+use std::fmt::{self, Debug, Display};
+
+/// To deal with information without exactness guarantees, we wrap it inside a
+/// [`Sharpness`] object to express its reliability.

Review Comment:
   ```suggestion
   /// Represents a value with a degree of certainty. `Sharpness` is used to 
   /// propagate information the precision of statistical values.
   ```
   
   While writing this I wonder if naming `Sharpness` to  `Precision` might 
match more closely with what this type represents 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to