alamb commented on code in PR #3903:
URL: https://github.com/apache/arrow-datafusion/pull/3903#discussion_r1000966996
##########
datafusion/optimizer/src/filter_push_down.rs:
##########
@@ -80,6 +83,11 @@ impl State {
.zip(predicates.1)
.for_each(|(expr, cols)| self.filters.push((expr.clone(),
cols.clone())))
}
+
+ fn with_cnf_rewrite(mut self) -> Self {
Review Comment:
I think this should have some doccomments about what it is and when one it
should/should not be set
##########
datafusion/optimizer/src/utils.rs:
##########
@@ -96,30 +96,173 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs:
Vec<&'a Expr>) -> Vec<&
/// ];
///
/// // use split_conjunction_owned to split them
-/// assert_eq!(split_conjunction_owned(expr), split);
+/// assert_eq!(split_conjunction_owned(expr, Operator::And), split);
/// ```
-pub fn split_conjunction_owned(expr: Expr) -> Vec<Expr> {
- split_conjunction_owned_impl(expr, vec![])
+pub fn split_conjunction_owned(expr: Expr, op: Operator) -> Vec<Expr> {
+ split_conjunction_owned_impl(expr, op, vec![])
}
-fn split_conjunction_owned_impl(expr: Expr, mut exprs: Vec<Expr>) -> Vec<Expr>
{
+fn split_conjunction_owned_impl(
+ expr: Expr,
+ operator: Operator,
+ mut exprs: Vec<Expr>,
+) -> Vec<Expr> {
match expr {
- Expr::BinaryExpr(BinaryExpr {
- right,
- op: Operator::And,
- left,
- }) => {
- let exprs = split_conjunction_owned_impl(*left, exprs);
- split_conjunction_owned_impl(*right, exprs)
+ Expr::BinaryExpr(BinaryExpr { right, op, left }) if op == operator => {
+ let exprs = split_conjunction_owned_impl(*left, Operator::And,
exprs);
+ split_conjunction_owned_impl(*right, Operator::And, exprs)
}
- Expr::Alias(expr, _) => split_conjunction_owned_impl(*expr, exprs),
+ Expr::Alias(expr, _) => split_conjunction_owned_impl(*expr,
Operator::And, exprs),
other => {
exprs.push(other);
exprs
}
}
}
+/// Converts an expression to conjunctive normal form (CNF).
+///
+/// The following expression is in CNF:
+/// `(a OR b) AND (c OR d)`
+/// The following is not in CNF:
+/// `(a AND b) OR c`.
+/// But could be rewrite to a CNF expression:
+/// `(a OR c) AND (b OR c)`.
+///
+/// # Example
+/// ```
+/// # use datafusion_expr::{col, lit};
+/// # use datafusion_expr::expr_rewriter::ExprRewritable;
+/// # use datafusion_optimizer::utils::CnfHelper;
+/// // (a=1 AND b=2)OR c = 3
+/// let expr1 = col("a").eq(lit(1)).and(col("b").eq(lit(2)));
+/// let expr2 = col("c").eq(lit(3));
+/// let expr = expr1.or(expr2);
+///
+/// //(a=1 or c=3)AND(b=2 or c=3)
+/// let expr1 = col("a").eq(lit(1)).or(col("c").eq(lit(3)));
+/// let expr2 = col("b").eq(lit(2)).or(col("c").eq(lit(3)));
+/// let expect = expr1.and(expr2);
+/// // use split_conjunction_owned to split them
+/// assert_eq!(expr.rewrite(& mut CnfHelper::new()).unwrap(), expect);
+/// ```
+///
+pub struct CnfHelper {
+ max_count: usize,
+ current_count: usize,
+ exprs: Vec<Expr>,
+ original_expr: Option<Expr>,
+}
+
+impl CnfHelper {
+ pub fn new() -> Self {
+ CnfHelper {
+ max_count: 50,
+ current_count: 0,
+ exprs: vec![],
+ original_expr: None,
+ }
+ }
+
+ pub fn new_with_max_count(max_count: usize) -> Self {
+ CnfHelper {
+ max_count,
+ current_count: 0,
+ exprs: vec![],
+ original_expr: None,
+ }
+ }
+
+ fn increment_and_check_overload(&mut self) -> bool {
+ self.current_count += 1;
+ self.current_count >= self.max_count
+ }
+}
+
+impl ExprRewriter for CnfHelper {
Review Comment:
I feel like this code is a little confusing -- it isn't really doing a
rewrite the same way other `ExprRewriters` do. Instead it is doing an analysis
in one pass through (via `pre_visit`) and then replacing it in one go
I also feel like number of `clone`s is significant -- it both copies the
original expr (in case the rewrite doesn't work) but also copies the exprs in
the BinaryExprs
I feel like there must be a way to avoid at least one of those copies 🤔 But
I am not sure without trying it myself
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]