mingmwang commented on code in PR #4193:
URL: https://github.com/apache/arrow-datafusion/pull/4193#discussion_r1023890252
##########
datafusion/sql/src/planner.rs:
##########
@@ -5509,6 +5599,99 @@ mod tests {
assert!(logical_plan("SELECT \"1\"").is_err());
}
+ #[test]
+ fn test_constant_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id = 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Filter: person.id = Int64(10)\
+ \n CrossJoin:\
+ \n TableScan: person\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_right_left_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON orders.customer_id * 2 = person.id + 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2)\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_single_column_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + 10 = orders.customer_id * 2";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2)\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_multiple_column_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + person.age + 10 = orders.customer_id * 2 -
orders.price";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + person.age + Int64(10) =
orders.customer_id * Int64(2) - orders.price\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ person.age + Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2) - orders.price\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_left_projection_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + person.age + 10 = orders.customer_id";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + person.age + Int64(10) =
orders.customer_id\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ person.age + Int64(10)\
+ \n TableScan: person\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_right_projection_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id = orders.customer_id * 2 - orders.price";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id = orders.customer_id * Int64(2) -
orders.price\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2) - orders.price\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
Review Comment:
> For example, I wonder if it is correct to always pull expressions below
the join for FULL OUTER JOINs 🤔
>
> Can you add a test for something like
>
> ```
> let sql = "SELECT id, order_id \
> FROM person \
> FULL OUTER JOIN orders \
> ON person.id = 10";
> ```
>
> if it doesn't already exist?
>
> In that case the predicates need to be evaluated within the join (if it is
done as a filter afterwards it may filter rows that should not be filtered)
I think it is OK to projection such kind expressions, the type of the new
projected column type is bool, the bool value is
evaluated again during the join.
One thing that I am not clear now and whether we should cover in this PR and
can do in the following PR is
Should we allow the following cases ?
// Join conditions include suspicious Exprs
SELECT id, order_id
FROM person
FULL OUTER JOIN orders
ON person.id = person.name = order.id
// Join conditions include non-deterministic Exprs
SELECT id, order_id
FROM person
FULL OUTER JOIN orders
ON person.id = person.name || random(xxx)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]