jayzhan211 commented on code in PR #9692:
URL: https://github.com/apache/arrow-datafusion/pull/9692#discussion_r1530268526
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1473,7 +1470,123 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'a, S> {
negated: false,
};
- return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+ Transformed::yes(Expr::InList(merged_inlist))
+ }
+
+ // Simplify expressions that is guaranteed to be true or false to
a literal boolean expression
+ //
+ // Rules:
+ // If both expressions are `IN` or `NOT IN`, then we can apply
intersection or union on both lists
+ // Intersection:
+ // 1. `a in (1,2,3) AND a in (4,5) -> a in (), which is false`
+ // 2. `a in (1,2,3) AND a in (2,3,4) -> a in (2,3)`
+ // 3. `a not in (1,2,3) OR a not in (3,4,5,6) -> a not in (3)`
+ // Union:
+ // 4. `a not int (1,2,3) AND a not in (4,5,6) -> a not in
(1,2,3,4,5,6)`
+ // # This rule is handled by `or_in_list_simplifier.rs`
+ // 5. `a in (1,2,3) OR a in (4,5,6) -> a in (1,2,3,4,5,6)`
+ // If one of the expressions is `IN` and another one is `NOT IN`,
then we apply exception on `In` expression
+ // 6. `a in (1,2,3,4) AND a not in (1,2,3,4,5) -> a in (),
which is false`
+ // 7. `a not in (1,2,3,4) AND a in (1,2,3,4,5) -> a = 5`
+ // 8. `a in (1,2,3,4) AND a not in (5,6,7,8) -> a in (1,2,3,4)`
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::And,
+ right,
+ }) if are_inlist_and_eq_and_match_neg(
+ left.as_ref(),
+ right.as_ref(),
+ false,
+ false,
+ ) =>
+ {
+ match (*left, *right) {
+ (Expr::InList(l1), Expr::InList(l2)) => {
+ return inlist_intersection(l1, l2,
false).map(Transformed::yes);
+ }
+ // Matched previously once
+ _ => unreachable!(),
+ }
+ }
+
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::And,
+ right,
+ }) if are_inlist_and_eq_and_match_neg(
+ left.as_ref(),
+ right.as_ref(),
+ true,
+ true,
+ ) =>
+ {
+ match (*left, *right) {
+ (Expr::InList(l1), Expr::InList(l2)) => {
+ return inlist_union(l1, l2,
true).map(Transformed::yes);
+ }
+ // Matched previously once
+ _ => unreachable!(),
+ }
+ }
+
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::And,
+ right,
+ }) if are_inlist_and_eq_and_match_neg(
+ left.as_ref(),
+ right.as_ref(),
+ false,
+ true,
+ ) =>
+ {
+ match (*left, *right) {
+ (Expr::InList(l1), Expr::InList(l2)) => {
+ return inlist_except(l1, l2).map(Transformed::yes);
+ }
+ // Matched previously once
+ _ => unreachable!(),
+ }
+ }
+
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::And,
+ right,
+ }) if are_inlist_and_eq_and_match_neg(
+ left.as_ref(),
+ right.as_ref(),
+ true,
+ false,
+ ) =>
+ {
+ match (*left, *right) {
+ (Expr::InList(l1), Expr::InList(l2)) => {
+ return inlist_except(l2, l1).map(Transformed::yes);
+ }
+ // Matched previously once
+ _ => unreachable!(),
+ }
+ }
+
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::Or,
+ right,
+ }) if are_inlist_and_eq_and_match_neg(
+ left.as_ref(),
+ right.as_ref(),
+ true,
+ true,
+ ) =>
+ {
+ match (*left, *right) {
+ (Expr::InList(l1), Expr::InList(l2)) => {
Review Comment:
Currently, those inlist set rules does not support column yet, so we don't
need `as_list`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]