alamb commented on code in PR #9628:
URL: https://github.com/apache/arrow-datafusion/pull/9628#discussion_r1527349308


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for 
Simplifier<'a, S> {
                 Transformed::yes(lit(false))
             }
 
+            // expr IN () --> false
+            // expr NOT IN () --> true
+            Expr::InList(InList {
+                expr,
+                list,
+                negated,
+            }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null) 
=> {
+                Transformed::yes(lit(negated))
+            }
+
+            // null in (x, y, z) --> null
+            // null not in (x, y, z) --> null
+            Expr::InList(InList {
+                expr,
+                list: _,
+                negated: _,
+            }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+            Expr::InList(InList {
+                expr,
+                mut list,
+                negated,
+            }) if list.len() == 1
+                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+            {
+                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+                    unreachable!()
+                };
+
+                Transformed::yes(Expr::InSubquery(InSubquery::new(
+                    expr, subquery, negated,
+                )))
+            }
+
+            // Combine multiple OR expressions into a single IN list 
expression if possible
+            //
+            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Or,
+                right,
+            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+                let left = as_inlist(left.as_ref());
+                let right = as_inlist(right.as_ref());
+
+                let lhs = left.unwrap();
+                let rhs = right.unwrap();
+                let lhs = lhs.into_owned();
+                let rhs = rhs.into_owned();
+                let mut seen: HashSet<Expr> = HashSet::new();
+                let list = lhs
+                    .list
+                    .into_iter()
+                    .chain(rhs.list)
+                    .filter(|e| seen.insert(e.to_owned()))
+                    .collect::<Vec<_>>();
+
+                let merged_inlist = InList {
+                    expr: lhs.expr,
+                    list,
+                    negated: false,
+                };
+
+                return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+            }
+
             // no additional rewrites possible
             expr => Transformed::no(expr),
         })
     }
 }
 
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+    let left = as_inlist(left);
+    let right = as_inlist(right);
+    if let (Some(lhs), Some(rhs)) = (left, right) {
+        lhs.expr.try_into_col().is_ok()
+            && rhs.expr.try_into_col().is_ok()
+            && lhs.expr == rhs.expr
+            && !lhs.negated
+            && !rhs.negated
+    } else {
+        false
+    }
+}
+
+/// Try to convert an expression to an in-list expression
+fn as_inlist(expr: &Expr) -> Option<Cow<InList>> {
+    match expr {
+        Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
+        Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == 
Operator::Eq => {
+            match (left.as_ref(), right.as_ref()) {
+                (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList {
+                    expr: left.clone(),

Review Comment:
   I agree -- thank you @jayzhan211 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to