alamb commented on code in PR #9628:
URL: https://github.com/apache/arrow-datafusion/pull/9628#discussion_r1527156663
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'a, S> {
Transformed::yes(lit(false))
}
+ // expr IN () --> false
+ // expr NOT IN () --> true
+ Expr::InList(InList {
+ expr,
+ list,
+ negated,
+ }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null)
=> {
+ Transformed::yes(lit(negated))
+ }
+
+ // null in (x, y, z) --> null
+ // null not in (x, y, z) --> null
+ Expr::InList(InList {
+ expr,
+ list: _,
+ negated: _,
+ }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+ // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+ Expr::InList(InList {
+ expr,
+ mut list,
+ negated,
+ }) if list.len() == 1
+ && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+ {
+ let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+ unreachable!()
+ };
+
+ Transformed::yes(Expr::InSubquery(InSubquery::new(
+ expr, subquery, negated,
+ )))
+ }
+
+ // Combine multiple OR expressions into a single IN list
expression if possible
+ //
+ // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::Or,
+ right,
+ }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+ let left = as_inlist(left.as_ref());
+ let right = as_inlist(right.as_ref());
+
+ let lhs = left.unwrap();
+ let rhs = right.unwrap();
+ let lhs = lhs.into_owned();
+ let rhs = rhs.into_owned();
+ let mut seen: HashSet<Expr> = HashSet::new();
+ let list = lhs
+ .list
+ .into_iter()
+ .chain(rhs.list)
+ .filter(|e| seen.insert(e.to_owned()))
+ .collect::<Vec<_>>();
+
+ let merged_inlist = InList {
+ expr: lhs.expr,
+ list,
+ negated: false,
+ };
+
+ return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+ }
+
// no additional rewrites possible
expr => Transformed::no(expr),
})
}
}
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+ let left = as_inlist(left);
+ let right = as_inlist(right);
+ if let (Some(lhs), Some(rhs)) = (left, right) {
+ lhs.expr.try_into_col().is_ok()
Review Comment:
`try_into_col().is_ok()` I think returns an error/string which is then
ignored
Maybe we could do something like `matches!(lhs.expr, Expr:Column(..)` 🤔
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1405,12 +1409,116 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'a, S> {
Transformed::yes(lit(false))
}
+ // expr IN () --> false
+ // expr NOT IN () --> true
+ Expr::InList(InList {
+ expr,
+ list,
+ negated,
+ }) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null)
=> {
+ Transformed::yes(lit(negated))
+ }
+
+ // null in (x, y, z) --> null
+ // null not in (x, y, z) --> null
+ Expr::InList(InList {
+ expr,
+ list: _,
+ negated: _,
+ }) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
+
+ // expr IN ((subquery)) -> expr IN (subquery), see ##5529
+ Expr::InList(InList {
+ expr,
+ mut list,
+ negated,
+ }) if list.len() == 1
+ && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
+ {
+ let Expr::ScalarSubquery(subquery) = list.remove(0) else {
+ unreachable!()
+ };
+
+ Transformed::yes(Expr::InSubquery(InSubquery::new(
+ expr, subquery, negated,
+ )))
+ }
+
+ // Combine multiple OR expressions into a single IN list
expression if possible
+ //
+ // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::Or,
+ right,
+ }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
+ let left = as_inlist(left.as_ref());
+ let right = as_inlist(right.as_ref());
+
+ let lhs = left.unwrap();
+ let rhs = right.unwrap();
+ let lhs = lhs.into_owned();
+ let rhs = rhs.into_owned();
+ let mut seen: HashSet<Expr> = HashSet::new();
+ let list = lhs
+ .list
+ .into_iter()
+ .chain(rhs.list)
+ .filter(|e| seen.insert(e.to_owned()))
+ .collect::<Vec<_>>();
+
+ let merged_inlist = InList {
+ expr: lhs.expr,
+ list,
+ negated: false,
+ };
+
+ return Ok(Transformed::yes(Expr::InList(merged_inlist)));
+ }
+
// no additional rewrites possible
expr => Transformed::no(expr),
})
}
}
+fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
+ let left = as_inlist(left);
+ let right = as_inlist(right);
+ if let (Some(lhs), Some(rhs)) = (left, right) {
+ lhs.expr.try_into_col().is_ok()
+ && rhs.expr.try_into_col().is_ok()
+ && lhs.expr == rhs.expr
+ && !lhs.negated
+ && !rhs.negated
+ } else {
+ false
+ }
+}
+
+/// Try to convert an expression to an in-list expression
+fn as_inlist(expr: &Expr) -> Option<Cow<InList>> {
+ match expr {
+ Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
+ Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op ==
Operator::Eq => {
+ match (left.as_ref(), right.as_ref()) {
+ (Expr::Column(_), Expr::Literal(_)) => Some(Cow::Owned(InList {
+ expr: left.clone(),
Review Comment:
it would be great to avoid these `clone`s but since this PR just moves the
code we can do it as a follow on PR I think
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -189,14 +192,15 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
.data()?
.rewrite(&mut inlist_simplifier)
.data()?
- .rewrite(&mut shorten_in_list_simplifier)
- .data()?
.rewrite(&mut guarantee_rewriter)
.data()?
// run both passes twice to try an minimize simplifications that
we missed
.rewrite(&mut const_evaluator)
.data()?
.rewrite(&mut simplifier)
+ .data()?
+ // shorten inlist should be started after other inlist rules are
applied
+ .rewrite(&mut shorten_in_list_simplifier)
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]