This is an automated email from the ASF dual-hosted git repository.

wjones127 pushed a commit to branch 6171-simplify-with-guarantee
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 45ae442ce721c4e7d2d96fb9fa8b9fa5b67bdab7
Author: Will Jones <[email protected]>
AuthorDate: Mon Sep 4 12:19:26 2023 -0700

    docs
---
 .../src/simplify_expressions/expr_simplifier.rs    |  53 +++++-
 .../src/simplify_expressions/guarantees.rs         |  36 +++-
 .../physical-expr/src/intervals/cp_solver.rs       | 202 ++++++++-------------
 3 files changed, 159 insertions(+), 132 deletions(-)

diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs 
b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 78a8d70635..7f1c692238 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -152,7 +152,58 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         expr.rewrite(&mut expr_rewrite)
     }
 
-    /// Add guarantees
+    /// Input guarantees and simplify the expression.
+    ///
+    /// The guarantees can simplify expressions. For example, if a column is
+    /// guaranteed to always be a certain value, it's references in the 
expression
+    /// can be replaced with that literal.
+    ///
+    /// ```rust
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, Expr};
+    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
+    /// use datafusion_physical_expr::execution_props::ExecutionProps;
+    /// use datafusion_optimizer::simplify_expressions::{
+    ///     ExprSimplifier, SimplifyContext,
+    ///     guarantees::{Guarantee, GuaranteeBound, NullStatus}};
+    ///
+    /// let schema = Schema::new(vec![
+    ///   Field::new("x", DataType::Int64, false),
+    ///   Field::new("y", DataType::UInt32, false),
+    ///   Field::new("z", DataType::Int64, false),
+    ///   ])
+    ///   .to_dfschema_ref().unwrap();
+    ///
+    /// // Create the simplifier
+    /// let props = ExecutionProps::new();
+    /// let context = SimplifyContext::new(&props)
+    ///    .with_schema(schema);
+    /// let simplifier = ExprSimplifier::new(context);
+    ///
+    /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
+    /// let expr_x = col("x").gt_eq(lit(3_i64));
+    /// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
+    /// let expr_z = col("z").gt(lit(5_i64));
+    /// let expr = expr_x.and(expr_y).and(expr_z.clone());
+    ///
+    /// let guarantees = vec![
+    ///    // x is guaranteed to be between 3 and 5
+    ///    (
+    ///        col("x"),
+    ///        Guarantee::new(
+    ///            Some(GuaranteeBound::new(ScalarValue::Int64(Some(3)), 
false)),
+    ///            Some(GuaranteeBound::new(ScalarValue::Int64(Some(5)), 
false)),
+    ///            NullStatus::NeverNull,
+    ///        )
+    ///    ),
+    ///    // y is guaranteed to be 3
+    ///    (col("y"), Guarantee::from(&ScalarValue::UInt32(Some(3)))),
+    /// ];
+    /// let output = simplifier.simplify_with_guarantees(expr, 
&guarantees).unwrap();
+    /// // Expression becomes: true AND true AND (z > 5), which simplifies to
+    /// // z > 5.
+    /// assert_eq!(output, expr_z);
+    /// ```
     pub fn simplify_with_guarantees<'a>(
         &self,
         expr: Expr,
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs 
b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index 0772eaab50..7fe2581778 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -15,8 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Logic to inject guarantees with expressions.
+//! Guarantees which can be used with 
[ExprSimplifier::simplify_with_guarantees()][crate::simplify_expressions::expr_simplifier::ExprSimplifier::simplify_with_guarantees].
 //!
+//! Guarantees can represent single values or possible ranges of values.
+//!
+//! ```
+//! use datafusion_optimizer::simplify_expressions::guarantees::{
+//!     Guarantee, GuaranteeBound, NullStatus};
+//!
+//! // Guarantee that value is always 1_i32
+//! Guarantee::from(&ScalarValue::Int32(Some(1)));
+//! // Guarantee that value is always NULL
+//! Guarantee::from(&ScalarValue::Null);
+//! // Guarantee that value is always between 1_i32 and 10_i32 (inclusive)
+//! // and never null.
+//! Guarantee::new(
+//!    Some(GuaranteeBound::new(ScalarValue::Int32(Some(1)), false)),
+//!    Some(GuaranteeBound::new(ScalarValue::Int32(Some(10)), false)),
+//!    NullStatus::NeverNull,
+//! );
+//! ```
 use datafusion_common::{tree_node::TreeNodeRewriter, Result, ScalarValue};
 use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr, Operator};
 use std::collections::HashMap;
@@ -24,7 +42,7 @@ use std::collections::HashMap;
 /// A bound on the value of an expression.
 #[derive(Debug, Clone, PartialEq)]
 pub struct GuaranteeBound {
-    /// The value of the bound.
+    /// The value of the bound. If the bound is null, then there is no bound.
     pub bound: ScalarValue,
     /// If true, the bound is exclusive. If false, the bound is inclusive.
     /// In terms of inequalities, this means the bound is `<` or `>` rather 
than
@@ -40,6 +58,7 @@ impl GuaranteeBound {
 }
 
 impl Default for GuaranteeBound {
+    /// Default value is a closed bound at null.
     fn default() -> Self {
         Self {
             bound: ScalarValue::Null,
@@ -70,9 +89,11 @@ pub enum NullStatus {
 /// nulls.
 #[derive(Debug, Clone, PartialEq)]
 pub struct Guarantee {
-    /// The min values that the expression can take on. If `min.bound` is
+    /// The min values that the expression can take on. If the min is null, 
then
+    /// there is no known min.
     pub min: GuaranteeBound,
-    /// The max values that the expression can take on.
+    /// The max values that the expression can take on. If the max is null,
+    /// then there is no known max.
     pub max: GuaranteeBound,
     /// Whether the expression is expected to be either always null or never 
null.
     pub null_status: NullStatus,
@@ -97,14 +118,19 @@ impl Guarantee {
         self.min.bound > *value || (self.min.bound == *value && self.min.open)
     }
 
+    /// Whether values are guaranteed to be greater than or equal to the given
+    /// value.
     fn greater_than_or_eq(&self, value: &ScalarValue) -> bool {
         self.min.bound >= *value
     }
 
+    /// Whether values are guaranteed to be less than the given value.
     fn less_than(&self, value: &ScalarValue) -> bool {
         self.max.bound < *value || (self.max.bound == *value && self.max.open)
     }
 
+    /// Whether values are guaranteed to be less than or equal to the given
+    /// value.
     fn less_than_or_eq(&self, value: &ScalarValue) -> bool {
         self.max.bound <= *value
     }
@@ -136,8 +162,6 @@ impl From<&ScalarValue> for Guarantee {
 }
 
 /// Rewrite expressions to incorporate guarantees.
-///
-///
 pub(crate) struct GuaranteeRewriter<'a> {
     guarantees: HashMap<&'a Expr, &'a Guarantee>,
 }
diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs 
b/datafusion/physical-expr/src/intervals/cp_solver.rs
index 7169101718..edf1507c70 100644
--- a/datafusion/physical-expr/src/intervals/cp_solver.rs
+++ b/datafusion/physical-expr/src/intervals/cp_solver.rs
@@ -16,99 +16,6 @@
 // under the License.
 
 //! Constraint propagator/solver for custom PhysicalExpr graphs.
-//! 
-//! Interval arithmetic provides a way to perform mathematical operations on
-//! intervals, which represent a range of possible values rather than a single
-//! point value. This allows for the propagation of ranges through mathematical
-//! operations, and can be used to compute bounds for a complicated expression.
-//! The key idea is that by breaking down a complicated expression into simpler
-//! terms, and then combining the bounds for those simpler terms, one can
-//! obtain bounds for the overall expression.
-//!
-//! For example, consider a mathematical expression such as x^2 + y = 4. Since
-//! it would be a binary tree in [PhysicalExpr] notation, this type of an
-//! hierarchical computation is well-suited for a graph based implementation.
-//! In such an implementation, an equation system f(x) = 0 is represented by a
-//! directed acyclic expression graph (DAEG).
-//!
-//! In order to use interval arithmetic to compute bounds for this expression,
-//! one would first determine intervals that represent the possible values of x
-//! and y. Let's say that the interval for x is [1, 2] and the interval for y
-//! is [-3, 1]. In the chart below, you can see how the computation takes 
place.
-//!
-//! This way of using interval arithmetic to compute bounds for a complex
-//! expression by combining the bounds for the constituent terms within the
-//! original expression allows us to reason about the range of possible values
-//! of the expression. This information later can be used in range pruning of
-//! the provably unnecessary parts of `RecordBatch`es.
-//!
-//! References
-//! 1 - Kabak, Mehmet Ozan. Analog Circuit Start-Up Behavior Analysis: An 
Interval
-//! Arithmetic Based Approach, Chapter 4. Stanford University, 2015.
-//! 2 - Moore, Ramon E. Interval analysis. Vol. 4. Englewood Cliffs: 
Prentice-Hall, 1966.
-//! 3 - F. Messine, "Deterministic global optimization using interval 
constraint
-//! propagation techniques," RAIRO-Operations Research, vol. 38, no. 04,
-//! pp. 277{293, 2004.
-//!
-//! ``` text
-//! Computing bounds for an expression using interval arithmetic.           
Constraint propagation through a top-down evaluation of the expression
-//!                                                                         
graph using inverse semantics.
-//!
-//!                                                                            
     [-2, 5] ∩ [4, 4] = [4, 4]              [4, 4]
-//!             +-----+                        +-----+                         
             +-----+                        +-----+
-//!        +----|  +  |----+              +----|  +  |----+                    
        +----|  +  |----+              +----|  +  |----+
-//!        |    |     |    |              |    |     |    |                    
        |    |     |    |              |    |     |    |
-//!        |    +-----+    |              |    +-----+    |                    
        |    +-----+    |              |    +-----+    |
-//!        |               |              |               |                    
        |               |              |               |
-//!    +-----+           +-----+      +-----+           +-----+                
    +-----+           +-----+      +-----+           +-----+
-//!    |   2 |           |  y  |      |   2 | [1, 4]    |  y  |                
    |   2 | [1, 4]    |  y  |      |   2 | [1, 4]    |  y  | [0, 1]*
-//!    |[.]  |           |     |      |[.]  |           |     |                
    |[.]  |           |     |      |[.]  |           |     |
-//!    +-----+           +-----+      +-----+           +-----+                
    +-----+           +-----+      +-----+           +-----+
-//!       |                              |                                     
       |              [-3, 1]         |
-//!       |                              |                                     
       |                              |
-//!     +---+                          +---+                                   
     +---+                          +---+
-//!     | x | [1, 2]                   | x | [1, 2]                            
     | x | [1, 2]                   | x | [1, 2]
-//!     +---+                          +---+                                   
     +---+                          +---+
-//!
-//!  (a) Bottom-up evaluation: Step1 (b) Bottom up evaluation: Step2           
  (a) Top-down propagation: Step1 (b) Top-down propagation: Step2
-//!
-//!                                        [1 - 3, 4 + 1] = [-2, 5]            
                                        [1 - 3, 4 + 1] = [-2, 5]
-//!             +-----+                        +-----+                         
             +-----+                        +-----+
-//!        +----|  +  |----+              +----|  +  |----+                    
        +----|  +  |----+              +----|  +  |----+
-//!        |    |     |    |              |    |     |    |                    
        |    |     |    |              |    |     |    |
-//!        |    +-----+    |              |    +-----+    |                    
        |    +-----+    |              |    +-----+    |
-//!        |               |              |               |                    
        |               |              |               |
-//!    +-----+           +-----+      +-----+           +-----+                
    +-----+           +-----+      +-----+           +-----+
-//!    |   2 |[1, 4]     |  y  |      |   2 |[1, 4]     |  y  |                
    |   2 |[3, 4]**   |  y  |      |   2 |[1, 4]     |  y  |
-//!    |[.]  |           |     |      |[.]  |           |     |                
    |[.]  |           |     |      |[.]  |           |     |
-//!    +-----+           +-----+      +-----+           +-----+                
    +-----+           +-----+      +-----+           +-----+
-//!       |              [-3, 1]         |              [-3, 1]                
       |              [0, 1]          |              [-3, 1]
-//!       |                              |                                     
       |                              |
-//!     +---+                          +---+                                   
     +---+                          +---+
-//!     | x | [1, 2]                   | x | [1, 2]                            
     | x | [1, 2]                   | x | [sqrt(3), 2]***
-//!     +---+                          +---+                                   
     +---+                          +---+
-//!
-//!  (c) Bottom-up evaluation: Step3 (d) Bottom-up evaluation: Step4           
  (c) Top-down propagation: Step3  (d) Top-down propagation: Step4
-//!
-//!                                                                            
 * [-3, 1] ∩ ([4, 4] - [1, 4]) = [0, 1]
-//!                                                                            
 ** [1, 4] ∩ ([4, 4] - [0, 1]) = [3, 4]
-//!                                                                            
 *** [1, 2] ∩ [sqrt(3), sqrt(4)] = [sqrt(3), 2]
-//! ```
-//! 
-//! # Examples
-//! 
-//! ```
-//! # Expression: (x + 4) - y
-//! 
-//! 
-//! # x: [0, 4), y: (1, 3]
-//! 
-//! # Result: 
-//! ```
-//! 
-//! # Null handling
-//! 
-//! 
 
 use std::collections::HashSet;
 use std::fmt::{Display, Formatter};
@@ -132,7 +39,83 @@ use crate::PhysicalExpr;
 
 use super::IntervalBound;
 
-
+// Interval arithmetic provides a way to perform mathematical operations on
+// intervals, which represent a range of possible values rather than a single
+// point value. This allows for the propagation of ranges through mathematical
+// operations, and can be used to compute bounds for a complicated expression.
+// The key idea is that by breaking down a complicated expression into simpler
+// terms, and then combining the bounds for those simpler terms, one can
+// obtain bounds for the overall expression.
+//
+// For example, consider a mathematical expression such as x^2 + y = 4. Since
+// it would be a binary tree in [PhysicalExpr] notation, this type of an
+// hierarchical computation is well-suited for a graph based implementation.
+// In such an implementation, an equation system f(x) = 0 is represented by a
+// directed acyclic expression graph (DAEG).
+//
+// In order to use interval arithmetic to compute bounds for this expression,
+// one would first determine intervals that represent the possible values of x
+// and y. Let's say that the interval for x is [1, 2] and the interval for y
+// is [-3, 1]. In the chart below, you can see how the computation takes place.
+//
+// This way of using interval arithmetic to compute bounds for a complex
+// expression by combining the bounds for the constituent terms within the
+// original expression allows us to reason about the range of possible values
+// of the expression. This information later can be used in range pruning of
+// the provably unnecessary parts of `RecordBatch`es.
+//
+// References
+// 1 - Kabak, Mehmet Ozan. Analog Circuit Start-Up Behavior Analysis: An 
Interval
+// Arithmetic Based Approach, Chapter 4. Stanford University, 2015.
+// 2 - Moore, Ramon E. Interval analysis. Vol. 4. Englewood Cliffs: 
Prentice-Hall, 1966.
+// 3 - F. Messine, "Deterministic global optimization using interval constraint
+// propagation techniques," RAIRO-Operations Research, vol. 38, no. 04,
+// pp. 277{293, 2004.
+//
+// ``` text
+// Computing bounds for an expression using interval arithmetic.           
Constraint propagation through a top-down evaluation of the expression
+//                                                                         
graph using inverse semantics.
+//
+//                                                                             
    [-2, 5] ∩ [4, 4] = [4, 4]              [4, 4]
+//             +-----+                        +-----+                          
            +-----+                        +-----+
+//        +----|  +  |----+              +----|  +  |----+                     
       +----|  +  |----+              +----|  +  |----+
+//        |    |     |    |              |    |     |    |                     
       |    |     |    |              |    |     |    |
+//        |    +-----+    |              |    +-----+    |                     
       |    +-----+    |              |    +-----+    |
+//        |               |              |               |                     
       |               |              |               |
+//    +-----+           +-----+      +-----+           +-----+                 
   +-----+           +-----+      +-----+           +-----+
+//    |   2 |           |  y  |      |   2 | [1, 4]    |  y  |                 
   |   2 | [1, 4]    |  y  |      |   2 | [1, 4]    |  y  | [0, 1]*
+//    |[.]  |           |     |      |[.]  |           |     |                 
   |[.]  |           |     |      |[.]  |           |     |
+//    +-----+           +-----+      +-----+           +-----+                 
   +-----+           +-----+      +-----+           +-----+
+//       |                              |                                      
      |              [-3, 1]         |
+//       |                              |                                      
      |                              |
+//     +---+                          +---+                                    
    +---+                          +---+
+//     | x | [1, 2]                   | x | [1, 2]                             
    | x | [1, 2]                   | x | [1, 2]
+//     +---+                          +---+                                    
    +---+                          +---+
+//
+//  (a) Bottom-up evaluation: Step1 (b) Bottom up evaluation: Step2            
 (a) Top-down propagation: Step1 (b) Top-down propagation: Step2
+//
+//                                        [1 - 3, 4 + 1] = [-2, 5]             
                                       [1 - 3, 4 + 1] = [-2, 5]
+//             +-----+                        +-----+                          
            +-----+                        +-----+
+//        +----|  +  |----+              +----|  +  |----+                     
       +----|  +  |----+              +----|  +  |----+
+//        |    |     |    |              |    |     |    |                     
       |    |     |    |              |    |     |    |
+//        |    +-----+    |              |    +-----+    |                     
       |    +-----+    |              |    +-----+    |
+//        |               |              |               |                     
       |               |              |               |
+//    +-----+           +-----+      +-----+           +-----+                 
   +-----+           +-----+      +-----+           +-----+
+//    |   2 |[1, 4]     |  y  |      |   2 |[1, 4]     |  y  |                 
   |   2 |[3, 4]**   |  y  |      |   2 |[1, 4]     |  y  |
+//    |[.]  |           |     |      |[.]  |           |     |                 
   |[.]  |           |     |      |[.]  |           |     |
+//    +-----+           +-----+      +-----+           +-----+                 
   +-----+           +-----+      +-----+           +-----+
+//       |              [-3, 1]         |              [-3, 1]                 
      |              [0, 1]          |              [-3, 1]
+//       |                              |                                      
      |                              |
+//     +---+                          +---+                                    
    +---+                          +---+
+//     | x | [1, 2]                   | x | [1, 2]                             
    | x | [1, 2]                   | x | [sqrt(3), 2]***
+//     +---+                          +---+                                    
    +---+                          +---+
+//
+//  (c) Bottom-up evaluation: Step3 (d) Bottom-up evaluation: Step4            
 (c) Top-down propagation: Step3  (d) Top-down propagation: Step4
+//
+//                                                                             
* [-3, 1] ∩ ([4, 4] - [1, 4]) = [0, 1]
+//                                                                             
** [1, 4] ∩ ([4, 4] - [0, 1]) = [3, 4]
+//                                                                             
*** [1, 2] ∩ [sqrt(3), sqrt(4)] = [sqrt(3), 2]
+// ```
 
 /// This object implements a directed acyclic expression graph (DAEG) that
 /// is used to compute ranges for expressions through interval arithmetic.
@@ -578,7 +561,6 @@ pub fn check_support(expr: &Arc<dyn PhysicalExpr>) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use datafusion_expr::expr;
     use itertools::Itertools;
 
     use crate::expressions::{BinaryExpr, Column};
@@ -1141,36 +1123,6 @@ mod tests {
         let final_node_count = graph.node_count();
         // Assert that the final node count is equal the previous node count 
(i.e., no node was pruned).
         assert_eq!(prev_node_count, final_node_count);
-        Ok(())
-    }
-
-    #[test]
-    fn my_test() -> Result<()> {
-        let expr_x = Arc::new(Column::new("x", 0));
-        let expr_y = Arc::new(Column::new("y", 1));
-        let expression = Arc::new(BinaryExpr::new(
-            Arc::new(BinaryExpr::new(
-                expr_x.clone(),
-                Operator::Plus,
-                Arc::new(Literal::new(ScalarValue::Int32(Some(4)))),
-            )),
-            Operator::Minus,
-            expr_y.clone(),
-        ));
-
-        let mut graph = 
ExprIntervalGraph::try_new(expression.clone()).unwrap();
-        // Pass in expr_x and expr_y so we can input their intervals
-        let mapping = graph.gather_node_indices(&[expr_x, expr_y]);
-
-        let interval_x = Interval::make(Some(0), Some(4), (false, true));
-        let interval_y = Interval::make(Some(1), Some(3), (true, false));
-        graph.assign_intervals(&[
-            (mapping[0].1, interval_x.clone()),
-            (mapping[1].1, interval_y.clone()),
-        ]);
-
-        
-
         Ok(())
     }
 }

Reply via email to