mingmwang commented on code in PR #4193:
URL: https://github.com/apache/arrow-datafusion/pull/4193#discussion_r1023890252


##########
datafusion/sql/src/planner.rs:
##########
@@ -5509,6 +5599,99 @@ mod tests {
         assert!(logical_plan("SELECT \"1\"").is_err());
     }
 
+    #[test]
+    fn test_constant_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON person.id = 10";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Filter: person.id = Int64(10)\
+        \n    CrossJoin:\
+        \n      TableScan: person\
+        \n      TableScan: orders";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn test_right_left_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON orders.customer_id * 2 = person.id + 10";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+        \n    Projection: person.id, person.first_name, person.last_name, 
person.age, person.state, person.salary, person.birth_date, person.😀, person.id 
+ Int64(10)\
+        \n      TableScan: person\
+        \n    Projection: orders.order_id, orders.customer_id, 
orders.o_item_id, orders.qty, orders.price, orders.delivered, 
orders.customer_id * Int64(2)\
+        \n      TableScan: orders";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn test_single_column_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON person.id + 10 = orders.customer_id * 2";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+        \n    Projection: person.id, person.first_name, person.last_name, 
person.age, person.state, person.salary, person.birth_date, person.😀, person.id 
+ Int64(10)\
+        \n      TableScan: person\
+        \n    Projection: orders.order_id, orders.customer_id, 
orders.o_item_id, orders.qty, orders.price, orders.delivered, 
orders.customer_id * Int64(2)\
+        \n      TableScan: orders";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn test_multiple_column_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON person.id + person.age + 10 = orders.customer_id * 2 - 
orders.price";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Inner Join: person.id + person.age + Int64(10) = 
orders.customer_id * Int64(2) - orders.price\
+        \n    Projection: person.id, person.first_name, person.last_name, 
person.age, person.state, person.salary, person.birth_date, person.😀, person.id 
+ person.age + Int64(10)\
+        \n      TableScan: person\
+        \n    Projection: orders.order_id, orders.customer_id, 
orders.o_item_id, orders.qty, orders.price, orders.delivered, 
orders.customer_id * Int64(2) - orders.price\
+        \n      TableScan: orders";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn test_left_projection_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON person.id + person.age + 10 = orders.customer_id";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Inner Join: person.id + person.age + Int64(10) = 
orders.customer_id\
+        \n    Projection: person.id, person.first_name, person.last_name, 
person.age, person.state, person.salary, person.birth_date, person.😀, person.id 
+ person.age + Int64(10)\
+        \n      TableScan: person\
+        \n    TableScan: orders";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn test_right_projection_expr_eq_join() {
+        let sql = "SELECT id, order_id \
+            FROM person \
+            INNER JOIN orders \
+            ON person.id = orders.customer_id * 2 - orders.price";
+
+        let expected = "Projection: person.id, orders.order_id\
+        \n  Inner Join: person.id = orders.customer_id * Int64(2) - 
orders.price\
+        \n    TableScan: person\
+        \n    Projection: orders.order_id, orders.customer_id, 
orders.o_item_id, orders.qty, orders.price, orders.delivered, 
orders.customer_id * Int64(2) - orders.price\
+        \n      TableScan: orders";
+        quick_test(sql, expected);
+    }
+

Review Comment:
   > For example, I wonder if it is correct to always pull expressions below 
the join for FULL OUTER JOINs 🤔
   > 
   > Can you add a test for something like
   > 
   > ```
   >  let sql = "SELECT id, order_id \
   >             FROM person \
   >             FULL OUTER JOIN orders \
   >             ON person.id = 10";
   > ```
   > 
   > if it doesn't already exist?
   > 
   > In that case the predicates need to be evaluated within the join (if it is 
done as a filter afterwards it may filter rows that should not be filtered)
   
   I think it is OK to projection such kind expressions, the type of the new 
projected column type is bool, the bool value is 
   evaluated again during the join.
   
   
   One thing that I am not clear now and whether we should cover in this PR and 
can do in the following PR is
   Should we allow the following cases ?
   
   // Join conditions include suspicious Exprs
   SELECT id, order_id 
                  FROM person 
                  FULL OUTER JOIN orders 
                  ON person.id = person.name = order.id
   
    
   // Join conditions include non-deterministic Exprs
   SELECT id, order_id 
                  FROM person 
                  FULL OUTER JOIN orders 
                  ON person.id = person.name || random(xxx)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to