alamb commented on code in PR #17743:
URL: https://github.com/apache/datafusion/pull/17743#discussion_r2372961652


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1471,6 +1508,56 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
                 // Do a first pass at simplification
                 out_expr.rewrite(self)?
             }
+            // CASE
+            //   WHEN X THEN true
+            //   WHEN Y THEN true
+            //   WHEN Z THEN false
+            //   ...
+            //   ELSE true
+            // END
+            //
+            // --->
+            //
+            // NOT(CASE
+            //   WHEN X THEN false
+            //   WHEN Y THEN false
+            //   WHEN Z THEN true
+            //   ...
+            //   ELSE false
+            // END)
+            //
+            // Note: the rationale for this rewrite is that the case can then 
be further
+            // simplified into a small number of ANDs and ORs
+            Expr::Case(Case {
+                expr: None,
+                when_then_expr,
+                else_expr,
+            }) if !when_then_expr.is_empty()
+                && when_then_expr
+                    .iter()
+                    .all(|(_, then)| is_bool_lit(then)) // all thens are 
literal bools
+                // This simplification is only helpful if we end up with a 
small number of true thens
+                && when_then_expr
+                    .iter()
+                    .filter(|(_, then)| is_false(then))
+                    .count()
+                    < 3
+                && else_expr.as_deref().is_none_or(is_bool_lit) =>
+            {
+                Transformed::yes(
+                    Expr::Case(Case {
+                        expr: None,
+                        when_then_expr: when_then_expr
+                            .iter()
+                            .map(|(when, then)| {
+                                (when.clone(), Box::new(then.clone().not()))
+                            })
+                            .collect(),
+                        else_expr: else_expr.map(|else_expr| 
Box::new(else_expr.not())),
+                    })
+                    .not(),
+                )

Review Comment:
   The same type of rewrite can be applied here to avoid all the new 
allocations / clones



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1399,6 +1399,39 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
             // Rules for Case
             //
 
+            // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE 
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: op @ (Eq | NotEq),
+                right,
+            }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
+                let case = as_case(&left)?;
+                Transformed::yes(Expr::Case(Case {
+                    expr: None,
+                    when_then_expr: case
+                        .when_then_expr
+                        .iter()
+                        .map(|(when, then)| {
+                            (
+                                when.clone(),
+                                Box::new(Expr::BinaryExpr(BinaryExpr {
+                                    left: then.clone(),
+                                    op,
+                                    right: right.clone(),
+                                })),
+                            )
+                        })
+                        .collect(),
+                    else_expr: case.else_expr.as_ref().map(|els| {
+                        Box::new(Expr::BinaryExpr(BinaryExpr {
+                            left: els.clone(),
+                            op,
+                            right,
+                        }))
+                    }),
+                }))

Review Comment:
   This ends up clone()'ing the expr several times which can be expensive -- I 
played around with it locally to avoid the `clone`s and I think many can be 
avoided. Here is what I came up with:
   
   ```diff
   @@ -1405,26 +1402,29 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
                    op: op @ (Eq | NotEq),
                    right,
                }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
   -                let case = as_case(&left)?;
   +                let Case {
   +                    expr: _,
   +                    when_then_expr,
   +                    else_expr,
   +                } = into_case(*left)?;
                    Transformed::yes(Expr::Case(Case {
                        expr: None,
   -                    when_then_expr: case
   -                        .when_then_expr
   -                        .iter()
   +                    when_then_expr: when_then_expr
   +                        .into_iter()
                            .map(|(when, then)| {
                                (
   -                                when.clone(),
   +                                when,
                                    Box::new(Expr::BinaryExpr(BinaryExpr {
   -                                    left: then.clone(),
   +                                    left: then,
                                        op,
                                        right: right.clone(),
                                    })),
                                )
                            })
                            .collect(),
   -                    else_expr: case.else_expr.as_ref().map(|els| {
   +                    else_expr: else_expr.map(|els| {
                            Box::new(Expr::BinaryExpr(BinaryExpr {
   -                            left: els.clone(),
   +                            left: els,
                                op,
                                right,
                            }))
   diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs 
b/datafusion/optimizer/src/simplify_expressions/utils.rs
   index e526df206..35e256f30 100644
   --- a/datafusion/optimizer/src/simplify_expressions/utils.rs
   +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs
   @@ -279,7 +279,7 @@ pub fn is_case_with_literal_outputs(expr: &Expr) -> bool 
{
        }
    }
   
   -pub fn as_case(expr: &Expr) -> Result<&Case> {
   +pub fn into_case(expr: Expr) -> Result<Case> {
        match expr {
            Expr::Case(case) => Ok(case),
            _ => internal_err!("Expected case, got {expr:?}"),
   ```



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1447,7 +1480,11 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
                 when_then_expr,
                 else_expr,
             }) if !when_then_expr.is_empty()
-                && when_then_expr.len() < 3 // The rewrite is O(n²) so limit 
to small number
+                // The rewrite is O(n²) in general so limit to small number of 
when-thens that can be true
+                && (when_then_expr.len() < 3 // small number of input whens
+                    // or all thens are literal bools and a small number of 
them are true

Review Comment:
   It seems like it would be valuable to do this rewrite regardless of the 
terms as long as all the arguments are boolean literals (why still limit to 
three true cases?)



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1399,6 +1399,39 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
             // Rules for Case
             //
 
+            // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE 
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END

Review Comment:
   I had to read this a few times and the PR's description to understand the 
*why* this rewrite is helpful
   
   ```suggestion
               // Inline a comparison to a literal with the case statement into 
the `THEN` clauses.
               // which can enable further simplifications
               // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE 
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
   ```



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1471,6 +1508,56 @@ impl<S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'_, S> {
                 // Do a first pass at simplification
                 out_expr.rewrite(self)?
             }
+            // CASE
+            //   WHEN X THEN true
+            //   WHEN Y THEN true
+            //   WHEN Z THEN false
+            //   ...
+            //   ELSE true
+            // END
+            //
+            // --->
+            //
+            // NOT(CASE
+            //   WHEN X THEN false
+            //   WHEN Y THEN false
+            //   WHEN Z THEN true
+            //   ...
+            //   ELSE false
+            // END)
+            //
+            // Note: the rationale for this rewrite is that the case can then 
be further

Review Comment:
   I don't understand why  rewrite adding a `NOT` make it simpler / easier to 
simplify?
   
   In recent days we have done several other CASE simplifications related to 
constants such as
   - https://github.com/apache/datafusion/issues/17448
   - https://github.com/apache/datafusion/pull/17602
   
   We have one more open (perhaps you can help nudge it along) which I think 
would result in simplifying the expressions
   - https://github.com/apache/datafusion/issues/17590. 
https://github.com/apache/datafusion/pull/17628
   
   Once https://github.com/apache/datafusion/pull/17628 is complete, do you 
think this rewrite would still add value?



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -3465,6 +3552,142 @@ mod tests {
         );
     }
 
+    #[test]
+    fn simplify_literal_case_equality() {
+        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok"
+        let simple_case = Expr::Case(Case::new(
+            None,
+            vec![(
+                Box::new(col("c2_non_null").not_eq(lit(false))),
+                Box::new(lit("ok")),
+            )],
+            Some(Box::new(lit("not_ok"))),
+        ));
+
+        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" == "ok"
+        // -->
+        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok"
+        // -->
+        // CASE WHEN c2 != false THEN true ELSE false
+        // -->
+        // c2
+        assert_eq!(
+            simplify(binary_expr(simple_case.clone(), Operator::Eq, 
lit("ok"),)),
+            col("c2_non_null"),
+        );
+
+        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" != "ok"
+        // -->
+        // NOT(CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok")
+        // -->
+        // NOT(CASE WHEN c2 != false THEN true ELSE false)
+        // -->
+        // NOT(c2)
+        assert_eq!(

Review Comment:
   this is very nice



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to