alamb commented on code in PR #7809: URL: https://github.com/apache/arrow-datafusion/pull/7809#discussion_r1356840656
########## datafusion/common/src/stats.rs: ########## @@ -17,11 +17,184 @@ //! This module provides data structures to represent statistics -use std::fmt::Display; - +use crate::ScalarValue; use arrow::datatypes::DataType; -use crate::ScalarValue; +use std::fmt::{self, Debug, Display}; + +/// To deal with information without exactness guarantees, we wrap it inside a +/// [`Sharpness`] object to express its reliability. Review Comment: ```suggestion /// Represents a value with a degree of certainty. `Sharpness` is used to /// propagate information the precision of statistical values. ``` While writing this I wonder if naming `Sharpness` to `Precision` might match more closely with what this type represents 🤔 ########## datafusion/common/src/stats.rs: ########## @@ -17,11 +17,184 @@ //! This module provides data structures to represent statistics -use std::fmt::Display; - +use crate::ScalarValue; use arrow::datatypes::DataType; -use crate::ScalarValue; +use std::fmt::{self, Debug, Display}; + +/// To deal with information without exactness guarantees, we wrap it inside a +/// [`Sharpness`] object to express its reliability. +#[derive(Clone, PartialEq, Eq, Default)] +pub enum Sharpness<T: Debug + Clone + PartialEq + Eq + PartialOrd> { + Exact(T), + Inexact(T), + #[default] + Absent, Review Comment: ```suggestion /// The exact value is known Exact(T), /// The value is not known exactly, but is likely close to this value Inexact(T), /// Nothing is known about the value #[default] Absent, ``` ########## datafusion/common/src/stats.rs: ########## @@ -17,11 +17,184 @@ //! This module provides data structures to represent statistics -use std::fmt::Display; - +use crate::ScalarValue; use arrow::datatypes::DataType; -use crate::ScalarValue; +use std::fmt::{self, Debug, Display}; + +/// To deal with information without exactness guarantees, we wrap it inside a +/// [`Sharpness`] object to express its reliability. +#[derive(Clone, PartialEq, Eq, Default)] +pub enum Sharpness<T: Debug + Clone + PartialEq + Eq + PartialOrd> { + Exact(T), + Inexact(T), + #[default] + Absent, +} + +impl<T: Debug + Clone + PartialEq + Eq + PartialOrd> Sharpness<T> { + /// If we have some value (exact or inexact), it returns that value. + /// Otherwise, it returns `None`. + pub fn get_value(&self) -> Option<&T> { + match self { + Sharpness::Exact(value) | Sharpness::Inexact(value) => Some(value), + Sharpness::Absent => None, + } + } + + /// Transform the value in this [`Sharpness`] object, if one exists, using + /// the given function. Preserves the exactness state. + pub fn map<F>(self, f: F) -> Sharpness<T> + where + F: Fn(T) -> T, + { + match self { + Sharpness::Exact(val) => Sharpness::Exact(f(val)), + Sharpness::Inexact(val) => Sharpness::Inexact(f(val)), + _ => self, + } + } + + /// Returns `Some(true)` if we have an exact value, `Some(false)` if we + /// have an inexact value, and `None` if there is no value. + pub fn is_exact(&self) -> Option<bool> { + match self { + Sharpness::Exact(_) => Some(true), + Sharpness::Inexact(_) => Some(false), + _ => None, + } + } + + /// Returns the maximum of two (possibly inexact) values, conservatively + /// propagating exactness information. If one of the input values is + /// [`Sharpness::Absent`], the result is `Absent` too. + pub fn max(&self, other: &Sharpness<T>) -> Sharpness<T> { + match (self, other) { + (Sharpness::Exact(a), Sharpness::Exact(b)) => { + Sharpness::Exact(if a >= b { a.clone() } else { b.clone() }) + } + (Sharpness::Inexact(a), Sharpness::Exact(b)) + | (Sharpness::Exact(a), Sharpness::Inexact(b)) + | (Sharpness::Inexact(a), Sharpness::Inexact(b)) => { + Sharpness::Inexact(if a >= b { a.clone() } else { b.clone() }) + } + (_, _) => Sharpness::Absent, + } + } + + /// Returns the minimum of two (possibly inexact) values, conservatively + /// propagating exactness information. If one of the input values is + /// [`Sharpness::Absent`], the result is `Absent` too. + pub fn min(&self, other: &Sharpness<T>) -> Sharpness<T> { + match (self, other) { + (Sharpness::Exact(a), Sharpness::Exact(b)) => { + Sharpness::Exact(if a >= b { b.clone() } else { a.clone() }) + } + (Sharpness::Inexact(a), Sharpness::Exact(b)) + | (Sharpness::Exact(a), Sharpness::Inexact(b)) + | (Sharpness::Inexact(a), Sharpness::Inexact(b)) => { + Sharpness::Inexact(if a >= b { b.clone() } else { a.clone() }) + } + (_, _) => Sharpness::Absent, + } + } + + /// Demotes the sharpness state to inexact (if present). Review Comment: ```suggestion /// Demotes the sharpness state from exact to inexact (if present) ``` ########## datafusion/common/src/stats.rs: ########## @@ -17,11 +17,184 @@ //! This module provides data structures to represent statistics -use std::fmt::Display; - +use crate::ScalarValue; use arrow::datatypes::DataType; -use crate::ScalarValue; +use std::fmt::{self, Debug, Display}; + +/// To deal with information without exactness guarantees, we wrap it inside a +/// [`Sharpness`] object to express its reliability. Review Comment: ```suggestion /// Represents a value with a degree of certainty. `Sharpness` is used to /// propagate information the precision of statistical values. ``` While writing this I wonder if naming `Sharpness` to `Precision` might match more closely with what this type represents 🤔 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org