alamb commented on code in PR #9628:
URL: https://github.com/apache/arrow-datafusion/pull/9628#discussion_r1527156663


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
                 Transformed::yes(lit(false))
             }
 
+            // expr IN () --> false
+            // expr NOT IN () --> true
+            Expr::InList(InList {
+                expr,
+                list,
+                negated,
+            }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) 
=> {
+                Transformed::yes(lit(negated))
+            }
+
+            // null in (x, y, z) --> null
+            // null not in (x, y, z) --> null
+            Expr::InList(InList {
+                expr,
+                list: _,
+                negated: _,
+            }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+            Expr::InList(InList {
+                expr,
+                mut list,
+                negated,
+            }) if list.len() == 1
+                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+            {
+                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+                    unreachable!()
+                };
+
+                Transformed::yes(Expr::InSubquery(InSubquery::new(
+                    expr, subquery, negated,
+                )))
+            }
+
+            // Combine multiple OR expressions into a single IN list 
expression if possible
+            //
+            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Or,
+                right,
+            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+                let left = as_inlist(left.as_ref());
+                let right = as_inlist(right.as_ref());
+
+                let lhs = left.unwrap();
+                let rhs = right.unwrap();
+                let lhs = lhs.into_owned();
+                let rhs = rhs.into_owned();
+                let mut seen: HashSet<Expr> = HashSet::new();
+                let list = lhs
+                    .list
+                    .into_iter()
+                    .chain(rhs.list)
+                    .filter(|e| seen.insert(e.to_owned()))
+                    .collect::<Vec<_>>();
+
+                let merged_inlist = InList {
+                    expr: lhs.expr,
+                    list,
+                    negated: false,
+                };
+
+                return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+            }
+
             // no additional rewrites possible
             expr => Transformed::no(expr),
         })
     }
 }
 
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+    let left = as_inlist(left);
+    let right = as_inlist(right);
+    if let (Some(lhs), Some(rhs)) = (left, right) {
+        lhs.expr.try_into_col().is_ok()

Review Comment:
   `try_into_col().is_ok()` I think returns an error/string which is then 
ignored
   
   Maybe we could do something like `matches!(lhs.expr, Expr:Column(..)` 🤔 



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
                 Transformed::yes(lit(false))
             }
 
+            // expr IN () --> false
+            // expr NOT IN () --> true
+            Expr::InList(InList {
+                expr,
+                list,
+                negated,
+            }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) 
=> {
+                Transformed::yes(lit(negated))
+            }
+
+            // null in (x, y, z) --> null
+            // null not in (x, y, z) --> null
+            Expr::InList(InList {
+                expr,
+                list: _,
+                negated: _,
+            }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+            Expr::InList(InList {
+                expr,
+                mut list,
+                negated,
+            }) if list.len() == 1
+                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+            {
+                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+                    unreachable!()
+                };
+
+                Transformed::yes(Expr::InSubquery(InSubquery::new(
+                    expr, subquery, negated,
+                )))
+            }
+
+            // Combine multiple OR expressions into a single IN list 
expression if possible
+            //
+            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Or,
+                right,
+            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+                let left = as_inlist(left.as_ref());
+                let right = as_inlist(right.as_ref());
+
+                let lhs = left.unwrap();
+                let rhs = right.unwrap();
+                let lhs = lhs.into_owned();
+                let rhs = rhs.into_owned();
+                let mut seen: HashSet<Expr> = HashSet::new();
+                let list = lhs
+                    .list
+                    .into_iter()
+                    .chain(rhs.list)
+                    .filter(|e| seen.insert(e.to_owned()))
+                    .collect::<Vec<_>>();
+
+                let merged_inlist = InList {
+                    expr: lhs.expr,
+                    list,
+                    negated: false,
+                };
+
+                return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+            }
+
             // no additional rewrites possible
             expr => Transformed::no(expr),
         })
     }
 }
 
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+    let left = as_inlist(left);
+    let right = as_inlist(right);
+    if let (Some(lhs), Some(rhs)) = (left, right) {
+        lhs.expr.try_into_col().is_ok()
+            && rhs.expr.try_into_col().is_ok()
+            && lhs.expr == rhs.expr
+            && !lhs.negated
+            && !rhs.negated
+    } else {
+        false
+    }
+}
+
+/// Try to convert an expression to an in-list expression
+fn as_inlist(expr: &Expr) -> Option<Cow<InList>> {
+    match expr {
+        Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
+        Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == 
Operator::Eq => {
+            match (left.as_ref(), right.as_ref()) {
+                (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList {
+                    expr: left.clone(),

Review Comment:
   it would be great to avoid these `clone`s but since this PR just moves the 
code we can do it as a follow on PR I think



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -189,14 +192,15 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
             .data()?
             .rewrite(&mut inlist_simplifier)
             .data()?
-            .rewrite(&mut shorten_in_list_simplifier)
-            .data()?
             .rewrite(&mut guarantee_rewriter)
             .data()?
             // run both passes twice to try an minimize simplifications that 
we missed
             .rewrite(&mut const_evaluator)
             .data()?
             .rewrite(&mut simplifier)
+            .data()?
+            // shorten inlist should be started after other inlist rules are 
applied
+            .rewrite(&mut shorten_in_list_simplifier)

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to