alamb commented on code in PR #2958:
URL: https://github.com/apache/arrow-datafusion/pull/2958#discussion_r928169456
##########
datafusion/core/tests/sql/joins.rs:
##########
@@ -1567,7 +1567,7 @@ async fn reduce_right_join_2() -> Result<()> {
let expected = vec![
"Explain [plan_type:Utf8, plan:Utf8]",
" Projection: #t1.t1_id, #t1.t1_name, #t1.t1_int, #t2.t2_id,
#t2.t2_name, #t2.t2_int [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N,
t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
- " Filter: NOT #t1.t1_int = #t2.t2_int [t1_id:UInt32;N,
t1_name:Utf8;N, t1_int:UInt32;N, t2_id:UInt32;N, t2_name:Utf8;N,
t2_int:UInt32;N]",
+ " Filter: #t1.t1_int != #t2.t2_int [t1_id:UInt32;N, t1_name:Utf8;N,
t1_int:UInt32;N, t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
Review Comment:
👍
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -185,6 +181,97 @@ fn as_bool_lit(expr: Expr) -> Option<bool> {
}
}
+/// negate a Not clause
+/// input is the clause to be negated.(args of Not clause)
+/// For BinaryExpr, use the negator of op instead.
+/// not ( A > B) ===> (A <= B)
+/// For BoolExpr, not (A and B) ===> (not A) or (not B)
+/// not (A or B) ===> (not A) and (not B)
+/// not (not A) ===> A
+/// For NullExpr, not (A is not null) ===> A is null
+/// not (A is null) ===> A is not null
+/// For InList, not (A not in (..)) ===> A in (..)
+/// not (A in (..)) ===> A not in (..)
+/// For Between, not (A between B and C) ===> (A not between B and C)
+/// not (A not between B and C) ===> (A between B and C)
+/// For others, use Not clause
+fn negate_clause(expr: Expr) -> Expr {
+ match expr {
+ Expr::BinaryExpr { left, op, right } => {
+ match op {
+ // not (A = B) ===> (A <> B)
+ Operator::Eq => binary_expr(*left, Operator::NotEq, *right),
+ // not (A <> B) ===> (A = B)
+ Operator::NotEq => binary_expr(*left, Operator::Eq, *right),
+ // not (A < B) ===> (A >= B)
+ Operator::Lt => binary_expr(*left, Operator::GtEq, *right),
+ // not (A <= B) ===> (A > B)
+ Operator::LtEq => binary_expr(*left, Operator::Gt, *right),
+ // not (A > B) ===> (A <= B)
+ Operator::Gt => binary_expr(*left, Operator::LtEq, *right),
+ // not (A >= B) ===> (A < B)
+ Operator::GtEq => binary_expr(*left, Operator::Lt, *right),
+ // not (A like 'B') ===> (A not like 'B')
+ Operator::Like => binary_expr(*left, Operator::NotLike,
*right),
+ // not (A not like 'B') ===> (A like 'B')
+ Operator::NotLike => binary_expr(*left, Operator::Like,
*right),
+ // not (A is distinct from B) ===> (A is not distinct from B)
+ Operator::IsDistinctFrom => {
+ binary_expr(*left, Operator::IsNotDistinctFrom, *right)
+ }
+ // not (A is not distinct from B) ===> (A is distinct from B)
+ Operator::IsNotDistinctFrom => {
+ binary_expr(*left, Operator::IsDistinctFrom, *right)
+ }
+ // not (A and B) ===> (not A) or (not B)
+ Operator::And => {
+ let left = negate_clause(*left);
+ let right = negate_clause(*right);
+
+ or(left, right)
+ }
+ // not (A or B) ===> (not A) and (not B)
+ Operator::Or => {
+ let left = negate_clause(*left);
+ let right = negate_clause(*right);
+
+ and(left, right)
+ }
+ // use not clause
+ _ => Expr::Not(Box::new(binary_expr(*left, op, *right))),
+ }
+ }
+ // not (not A) ===> A
+ Expr::Not(expr) => *expr,
+ // not (A is not null) ===> A is null
+ Expr::IsNotNull(expr) => expr.is_null(),
+ // not (A is null) ===> A is not null
+ Expr::IsNull(expr) => expr.is_not_null(),
+ // not (A not in (..)) ===> A in (..)
+ // not (A in (..)) ===> A not in (..)
+ Expr::InList {
+ expr,
+ list,
+ negated,
+ } => expr.in_list(list, !negated),
+ // not (A between B and C) ===> (A not between B and C)
+ // not (A not between B and C) ===> (A between B and C)
+ Expr::Between {
+ expr,
+ negated,
+ low,
+ high,
+ } => Expr::Between {
+ expr,
+ negated: !negated,
+ low,
+ high,
+ },
+ // use not clause
Review Comment:
👍 I reviewed this logic carefully
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -185,6 +181,97 @@ fn as_bool_lit(expr: Expr) -> Option<bool> {
}
}
+/// negate a Not clause
+/// input is the clause to be negated.(args of Not clause)
+/// For BinaryExpr, use the negator of op instead.
+/// not ( A > B) ===> (A <= B)
+/// For BoolExpr, not (A and B) ===> (not A) or (not B)
+/// not (A or B) ===> (not A) and (not B)
+/// not (not A) ===> A
+/// For NullExpr, not (A is not null) ===> A is null
+/// not (A is null) ===> A is not null
+/// For InList, not (A not in (..)) ===> A in (..)
+/// not (A in (..)) ===> A not in (..)
+/// For Between, not (A between B and C) ===> (A not between B and C)
+/// not (A not between B and C) ===> (A between B and C)
+/// For others, use Not clause
+fn negate_clause(expr: Expr) -> Expr {
+ match expr {
+ Expr::BinaryExpr { left, op, right } => {
+ match op {
+ // not (A = B) ===> (A <> B)
+ Operator::Eq => binary_expr(*left, Operator::NotEq, *right),
Review Comment:
I don't know if it matters, but `binary_expr` may cause a new allocation via
`Box` -- I wonder if this code could be written to just pick a new op and
reassmble the expr, like
```rust
Expr::BinaryExpr { left, op, right } => Expr::BinaryExpr { left, op:
negate_op(op), right }
```
not necessary, just something that I thought of
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -905,7 +987,7 @@ mod tests {
Operator::And,
Expr::not(col("c2").gt(lit(5))),
);
- let expected = expr.clone();
+ let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
Review Comment:
Maybe can we update the comment above as well
from
```rust
// (c > 5) AND !(c > 5) -- can't remove
```
to
```rust
// (c > 5) AND !(c > 5) -- > (c > 5) AND (c <= 5)
```
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -185,6 +181,97 @@ fn as_bool_lit(expr: Expr) -> Option<bool> {
}
}
+/// negate a Not clause
+/// input is the clause to be negated.(args of Not clause)
+/// For BinaryExpr, use the negator of op instead.
+/// not ( A > B) ===> (A <= B)
+/// For BoolExpr, not (A and B) ===> (not A) or (not B)
+/// not (A or B) ===> (not A) and (not B)
+/// not (not A) ===> A
+/// For NullExpr, not (A is not null) ===> A is null
+/// not (A is null) ===> A is not null
+/// For InList, not (A not in (..)) ===> A in (..)
+/// not (A in (..)) ===> A not in (..)
+/// For Between, not (A between B and C) ===> (A not between B and C)
+/// not (A not between B and C) ===> (A between B and C)
+/// For others, use Not clause
+fn negate_clause(expr: Expr) -> Expr {
+ match expr {
+ Expr::BinaryExpr { left, op, right } => {
+ match op {
+ // not (A = B) ===> (A <> B)
+ Operator::Eq => binary_expr(*left, Operator::NotEq, *right),
+ // not (A <> B) ===> (A = B)
+ Operator::NotEq => binary_expr(*left, Operator::Eq, *right),
+ // not (A < B) ===> (A >= B)
+ Operator::Lt => binary_expr(*left, Operator::GtEq, *right),
+ // not (A <= B) ===> (A > B)
+ Operator::LtEq => binary_expr(*left, Operator::Gt, *right),
+ // not (A > B) ===> (A <= B)
+ Operator::Gt => binary_expr(*left, Operator::LtEq, *right),
+ // not (A >= B) ===> (A < B)
+ Operator::GtEq => binary_expr(*left, Operator::Lt, *right),
+ // not (A like 'B') ===> (A not like 'B')
+ Operator::Like => binary_expr(*left, Operator::NotLike,
*right),
+ // not (A not like 'B') ===> (A like 'B')
+ Operator::NotLike => binary_expr(*left, Operator::Like,
*right),
+ // not (A is distinct from B) ===> (A is not distinct from B)
+ Operator::IsDistinctFrom => {
+ binary_expr(*left, Operator::IsNotDistinctFrom, *right)
+ }
+ // not (A is not distinct from B) ===> (A is distinct from B)
+ Operator::IsNotDistinctFrom => {
+ binary_expr(*left, Operator::IsDistinctFrom, *right)
+ }
+ // not (A and B) ===> (not A) or (not B)
+ Operator::And => {
+ let left = negate_clause(*left);
+ let right = negate_clause(*right);
+
+ or(left, right)
+ }
+ // not (A or B) ===> (not A) and (not B)
Review Comment:
nice
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -1453,7 +1535,7 @@ mod tests {
],
else_expr: Some(Box::new(lit(true))),
})),
- col("c1").or(col("c1").or(col("c2")).not())
+ col("c1").or(col("c1").not().and(col("c2").not()))
Review Comment:
And one more place to update the comments?
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -1434,7 +1516,7 @@ mod tests {
],
else_expr: Some(Box::new(lit(true))),
})),
- col("c1").or(col("c1").or(col("c2")).not())
+ col("c1").or(col("c1").not().and(col("c2").not()))
Review Comment:
Can you please update the comments here as well?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]