Jefffrey commented on code in PR #8780:
URL: https://github.com/apache/arrow-datafusion/pull/8780#discussion_r1444264987


##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -224,6 +226,63 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     }
 }
 
+#[allow(rustdoc::private_intra_doc_links)]
+/// Canonicalize any BinaryExprs that are not in canonical form
+/// <literal> <op> <col> is rewritten to <col> <op> <literal> (remember to 
switch the operator)
+/// <col> <op> <literal> is canonical
+/// <col1> <op> <col2> is rewritten so that the name of col1 sorts higher than 
col2 (b > a would be canonicalized to a < b)
+struct Canonicalizer {}
+
+impl Canonicalizer {
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TreeNodeRewriter for Canonicalizer {
+    type N = Expr;
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr {
+            // Case 1, <col1> <op> <col2>
+            let mut new_expr = BinaryExpr {
+                left: left.clone(),
+                op: op.clone(),
+                right: right.clone(),
+            };
+            let mut switch_op: Operator = op.clone();
+            if left.try_into_col().is_ok() && right.try_into_col().is_ok() {

Review Comment:
   I think this check is better off done via a match, e.g.
   
   ```rust
   match (left.as_ref(), right.as_ref()) {
       (Expr::Column(a), Expr::Column(b)) => todo!(),
       _ => todo!(),
   }
   ```



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -224,6 +226,63 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     }
 }
 
+#[allow(rustdoc::private_intra_doc_links)]
+/// Canonicalize any BinaryExprs that are not in canonical form
+/// <literal> <op> <col> is rewritten to <col> <op> <literal> (remember to 
switch the operator)
+/// <col> <op> <literal> is canonical
+/// <col1> <op> <col2> is rewritten so that the name of col1 sorts higher than 
col2 (b > a would be canonicalized to a < b)
+struct Canonicalizer {}
+
+impl Canonicalizer {
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TreeNodeRewriter for Canonicalizer {
+    type N = Expr;
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr {
+            // Case 1, <col1> <op> <col2>
+            let mut new_expr = BinaryExpr {
+                left: left.clone(),
+                op: op.clone(),
+                right: right.clone(),
+            };
+            let mut switch_op: Operator = op.clone();
+            if left.try_into_col().is_ok() && right.try_into_col().is_ok() {
+                let left_name = left.canonical_name();
+                let right_name = right.canonical_name();
+                if left_name < right_name {
+                    if let Some(swap_op) = op.swap() {
+                        switch_op = swap_op;
+                    }

Review Comment:
   You're on the right track here with swapping the Operator, but one very 
important note is that for this functionality, we should only swap certain 
operators that it makes sense to.
   
   For example, we cannot swap `A - B` to `B - A`, as that changes the 
expression's meaning



##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -224,6 +226,63 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     }
 }
 
+#[allow(rustdoc::private_intra_doc_links)]
+/// Canonicalize any BinaryExprs that are not in canonical form
+/// <literal> <op> <col> is rewritten to <col> <op> <literal> (remember to 
switch the operator)
+/// <col> <op> <literal> is canonical
+/// <col1> <op> <col2> is rewritten so that the name of col1 sorts higher than 
col2 (b > a would be canonicalized to a < b)
+struct Canonicalizer {}
+
+impl Canonicalizer {
+    fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TreeNodeRewriter for Canonicalizer {
+    type N = Expr;
+
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr {
+            // Case 1, <col1> <op> <col2>
+            let mut new_expr = BinaryExpr {
+                left: left.clone(),
+                op: op.clone(),
+                right: right.clone(),
+            };
+            let mut switch_op: Operator = op.clone();
+            if left.try_into_col().is_ok() && right.try_into_col().is_ok() {
+                let left_name = left.canonical_name();
+                let right_name = right.canonical_name();
+                if left_name < right_name {
+                    if let Some(swap_op) = op.swap() {
+                        switch_op = swap_op;
+                    }
+                    new_expr = BinaryExpr {
+                        left: right,
+                        op: switch_op,
+                        right: left,
+                    };
+                }
+            }
+            // Case 2, <literal> <op> <col>
+            else if left.try_into_col().is_err() && 
right.try_into_col().is_ok() {

Review Comment:
   One advantage of using `match` to destructure the `left`/`right` `Expr`s is 
that you would avoid cases like this, because a pitfall here is that just 
because an `Expr` is not a `Expr::Column` variant, doesn't mean it is a 
`Expr::Literal` variant, so failing to cast `left` to a `Column` could mean its 
another variant than `Expr::Literal`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to