wjones127 commented on code in PR #7467:
URL: https://github.com/apache/arrow-datafusion/pull/7467#discussion_r1322155765


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -149,6 +153,76 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
 
         expr.rewrite(&mut expr_rewrite)
     }
+
+    /// Input guarantees and simplify the expression.
+    ///
+    /// The guarantees can simplify expressions. For example, if a column `x` 
is
+    /// guaranteed to be `3`, then the expression `x > 1` can be replaced by 
the
+    /// literal `true`.
+    ///
+    /// The guarantees are provided as an iterator of `(Expr, 
NullableInterval)`
+    /// pairs, where the [Expr] is a column reference and the 
[NullableInterval]
+    /// is an interval representing the known possible values of that column.
+    ///
+    /// ```rust
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, Expr};
+    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
+    /// use datafusion_physical_expr::execution_props::ExecutionProps;
+    /// use datafusion_physical_expr::intervals::{Interval, NullableInterval};
+    /// use datafusion_optimizer::simplify_expressions::{
+    ///     ExprSimplifier, SimplifyContext};
+    ///
+    /// let schema = Schema::new(vec![
+    ///   Field::new("x", DataType::Int64, false),
+    ///   Field::new("y", DataType::UInt32, false),
+    ///   Field::new("z", DataType::Int64, false),
+    ///   ])
+    ///   .to_dfschema_ref().unwrap();
+    ///
+    /// // Create the simplifier
+    /// let props = ExecutionProps::new();
+    /// let context = SimplifyContext::new(&props)
+    ///    .with_schema(schema);
+    /// let simplifier = ExprSimplifier::new(context);
+    ///
+    /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
+    /// let expr_x = col("x").gt_eq(lit(3_i64));
+    /// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
+    /// let expr_z = col("z").gt(lit(5_i64));
+    /// let expr = expr_x.and(expr_y).and(expr_z.clone());
+    ///
+    /// let guarantees = vec![
+    ///    // x ∈ [3, 5]
+    ///    (
+    ///        col("x"),
+    ///        NullableInterval {
+    ///            values: Interval::make(Some(3_i64), Some(5_i64), (false, 
false)),
+    ///            is_valid: Interval::CERTAINLY_TRUE,
+    ///        }
+    ///    ),
+    ///    // y = 3
+    ///    (col("y"), NullableInterval::from(&ScalarValue::UInt32(Some(3)))),
+    /// ];
+    /// let output = simplifier.simplify_with_guarantees(expr, 
&guarantees).unwrap();
+    /// // Expression becomes: true AND true AND (z > 5), which simplifies to
+    /// // z > 5.
+    /// assert_eq!(output, expr_z);
+    /// ```
+    pub fn simplify_with_guarantees<'a>(

Review Comment:
   > The downside is that the guarantees would have to be owned (aka a Vec)
   
   That doesn't seem to bad, I think. My imagined use case is that we re-use 
the same simplifier with different guarantees but the same predicate. Something 
like:
   
   ```rust
   let mut simplifier = ExprSimplifier::new(context);
   for row_group in file {
       let guarantees = get_guarantees(row_groups.statistics);
       simplifier = simplifier.with_guarantees(guarantees);
       let group_predicate = simplifier.simplify(predicate);
       // Do something with the predicate
   }   
   ```
   
   So my main concern is that it's performant if handled in a loop like that. I 
think it should be.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to