alamb commented on code in PR #17743:
URL: https://github.com/apache/datafusion/pull/17743#discussion_r2372961652
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1471,6 +1508,56 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
// Do a first pass at simplification
out_expr.rewrite(self)?
}
+ // CASE
+ // WHEN X THEN true
+ // WHEN Y THEN true
+ // WHEN Z THEN false
+ // ...
+ // ELSE true
+ // END
+ //
+ // --->
+ //
+ // NOT(CASE
+ // WHEN X THEN false
+ // WHEN Y THEN false
+ // WHEN Z THEN true
+ // ...
+ // ELSE false
+ // END)
+ //
+ // Note: the rationale for this rewrite is that the case can then
be further
+ // simplified into a small number of ANDs and ORs
+ Expr::Case(Case {
+ expr: None,
+ when_then_expr,
+ else_expr,
+ }) if !when_then_expr.is_empty()
+ && when_then_expr
+ .iter()
+ .all(|(_, then)| is_bool_lit(then)) // all thens are
literal bools
+ // This simplification is only helpful if we end up with a
small number of true thens
+ && when_then_expr
+ .iter()
+ .filter(|(_, then)| is_false(then))
+ .count()
+ < 3
+ && else_expr.as_deref().is_none_or(is_bool_lit) =>
+ {
+ Transformed::yes(
+ Expr::Case(Case {
+ expr: None,
+ when_then_expr: when_then_expr
+ .iter()
+ .map(|(when, then)| {
+ (when.clone(), Box::new(then.clone().not()))
+ })
+ .collect(),
+ else_expr: else_expr.map(|else_expr|
Box::new(else_expr.not())),
+ })
+ .not(),
+ )
Review Comment:
The same type of rewrite can be applied here to avoid all the new
allocations / clones
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1399,6 +1399,39 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
// Rules for Case
//
+ // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: op @ (Eq | NotEq),
+ right,
+ }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
+ let case = as_case(&left)?;
+ Transformed::yes(Expr::Case(Case {
+ expr: None,
+ when_then_expr: case
+ .when_then_expr
+ .iter()
+ .map(|(when, then)| {
+ (
+ when.clone(),
+ Box::new(Expr::BinaryExpr(BinaryExpr {
+ left: then.clone(),
+ op,
+ right: right.clone(),
+ })),
+ )
+ })
+ .collect(),
+ else_expr: case.else_expr.as_ref().map(|els| {
+ Box::new(Expr::BinaryExpr(BinaryExpr {
+ left: els.clone(),
+ op,
+ right,
+ }))
+ }),
+ }))
Review Comment:
This ends up clone()'ing the expr several times which can be expensive -- I
played around with it locally to avoid the `clone`s and I think many can be
avoided. Here is what I came up with:
```diff
@@ -1405,26 +1402,29 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
op: op @ (Eq | NotEq),
right,
}) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
- let case = as_case(&left)?;
+ let Case {
+ expr: _,
+ when_then_expr,
+ else_expr,
+ } = into_case(*left)?;
Transformed::yes(Expr::Case(Case {
expr: None,
- when_then_expr: case
- .when_then_expr
- .iter()
+ when_then_expr: when_then_expr
+ .into_iter()
.map(|(when, then)| {
(
- when.clone(),
+ when,
Box::new(Expr::BinaryExpr(BinaryExpr {
- left: then.clone(),
+ left: then,
op,
right: right.clone(),
})),
)
})
.collect(),
- else_expr: case.else_expr.as_ref().map(|els| {
+ else_expr: else_expr.map(|els| {
Box::new(Expr::BinaryExpr(BinaryExpr {
- left: els.clone(),
+ left: els,
op,
right,
}))
diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs
b/datafusion/optimizer/src/simplify_expressions/utils.rs
index e526df206..35e256f30 100644
--- a/datafusion/optimizer/src/simplify_expressions/utils.rs
+++ b/datafusion/optimizer/src/simplify_expressions/utils.rs
@@ -279,7 +279,7 @@ pub fn is_case_with_literal_outputs(expr: &Expr) -> bool
{
}
}
-pub fn as_case(expr: &Expr) -> Result<&Case> {
+pub fn into_case(expr: Expr) -> Result<Case> {
match expr {
Expr::Case(case) => Ok(case),
_ => internal_err!("Expected case, got {expr:?}"),
```
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1447,7 +1480,11 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
when_then_expr,
else_expr,
}) if !when_then_expr.is_empty()
- && when_then_expr.len() < 3 // The rewrite is O(n²) so limit
to small number
+ // The rewrite is O(n²) in general so limit to small number of
when-thens that can be true
+ && (when_then_expr.len() < 3 // small number of input whens
+ // or all thens are literal bools and a small number of
them are true
Review Comment:
It seems like it would be valuable to do this rewrite regardless of the
terms as long as all the arguments are boolean literals (why still limit to
three true cases?)
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1399,6 +1399,39 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
// Rules for Case
//
+ // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
Review Comment:
I had to read this a few times and the PR's description to understand the
*why* this rewrite is helpful
```suggestion
// Inline a comparison to a literal with the case statement into
the `THEN` clauses.
// which can enable further simplifications
// CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE
WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
```
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1471,6 +1508,56 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
// Do a first pass at simplification
out_expr.rewrite(self)?
}
+ // CASE
+ // WHEN X THEN true
+ // WHEN Y THEN true
+ // WHEN Z THEN false
+ // ...
+ // ELSE true
+ // END
+ //
+ // --->
+ //
+ // NOT(CASE
+ // WHEN X THEN false
+ // WHEN Y THEN false
+ // WHEN Z THEN true
+ // ...
+ // ELSE false
+ // END)
+ //
+ // Note: the rationale for this rewrite is that the case can then
be further
Review Comment:
I don't understand why rewrite adding a `NOT` make it simpler / easier to
simplify?
In recent days we have done several other CASE simplifications related to
constants such as
- https://github.com/apache/datafusion/issues/17448
- https://github.com/apache/datafusion/pull/17602
We have one more open (perhaps you can help nudge it along) which I think
would result in simplifying the expressions
- https://github.com/apache/datafusion/issues/17590.
https://github.com/apache/datafusion/pull/17628
Once https://github.com/apache/datafusion/pull/17628 is complete, do you
think this rewrite would still add value?
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -3465,6 +3552,142 @@ mod tests {
);
}
+ #[test]
+ fn simplify_literal_case_equality() {
+ // CASE WHEN c2 != false THEN "ok" ELSE "not_ok"
+ let simple_case = Expr::Case(Case::new(
+ None,
+ vec![(
+ Box::new(col("c2_non_null").not_eq(lit(false))),
+ Box::new(lit("ok")),
+ )],
+ Some(Box::new(lit("not_ok"))),
+ ));
+
+ // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" == "ok"
+ // -->
+ // CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok"
+ // -->
+ // CASE WHEN c2 != false THEN true ELSE false
+ // -->
+ // c2
+ assert_eq!(
+ simplify(binary_expr(simple_case.clone(), Operator::Eq,
lit("ok"),)),
+ col("c2_non_null"),
+ );
+
+ // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" != "ok"
+ // -->
+ // NOT(CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok")
+ // -->
+ // NOT(CASE WHEN c2 != false THEN true ELSE false)
+ // -->
+ // NOT(c2)
+ assert_eq!(
Review Comment:
this is very nice
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]