jayzhan211 commented on code in PR #9628:
URL: https://github.com/apache/arrow-datafusion/pull/9628#discussion_r1527168866
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'a, S> {
Transformed::yes(lit(false))
}
+ // expr IN () --> false
+ // expr NOT IN () --> true
+ Expr::InList(InList {
+ expr,
+ list,
+ negated,
+ }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null)
=> {
+ Transformed::yes(lit(negated))
+ }
+
+ // null in (x, y, z) --> null
+ // null not in (x, y, z) --> null
+ Expr::InList(InList {
+ expr,
+ list: _,
+ negated: _,
+ }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+ // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+ Expr::InList(InList {
+ expr,
+ mut list,
+ negated,
+ }) if list.len() == 1
+ && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+ {
+ let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+ unreachable!()
+ };
+
+ Transformed::yes(Expr::InSubquery(InSubquery::new(
+ expr, subquery, negated,
+ )))
+ }
+
+ // Combine multiple OR expressions into a single IN list
expression if possible
+ //
+ // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::Or,
+ right,
+ }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+ let left = as_inlist(left.as_ref());
+ let right = as_inlist(right.as_ref());
+
+ let lhs = left.unwrap();
+ let rhs = right.unwrap();
+ let lhs = lhs.into_owned();
+ let rhs = rhs.into_owned();
+ let mut seen: HashSet<Expr> = HashSet::new();
+ let list = lhs
+ .list
+ .into_iter()
+ .chain(rhs.list)
+ .filter(|e| seen.insert(e.to_owned()))
+ .collect::<Vec<_>>();
+
+ let merged_inlist = InList {
+ expr: lhs.expr,
+ list,
+ negated: false,
+ };
+
+ return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+ }
+
// no additional rewrites possible
expr => Transformed::no(expr),
})
}
}
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+ let left = as_inlist(left);
+ let right = as_inlist(right);
+ if let (Some(lhs), Some(rhs)) = (left, right) {
+ lhs.expr.try_into_col().is_ok()
+ && rhs.expr.try_into_col().is_ok()
+ && lhs.expr == rhs.expr
+ && !lhs.negated
+ && !rhs.negated
+ } else {
+ false
+ }
+}
+
+/// Try to convert an expression to an in-list expression
+fn as_inlist(expr: &Expr) -> Option<Cow<InList>> {
+ match expr {
+ Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
+ Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op ==
Operator::Eq => {
+ match (left.as_ref(), right.as_ref()) {
+ (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList {
+ expr: left.clone(),
Review Comment:
Of course, removing cloned is my main goal
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]