alamb commented on code in PR #4193:
URL: https://github.com/apache/arrow-datafusion/pull/4193#discussion_r1023183597
##########
datafusion/sql/src/planner.rs:
##########
@@ -2837,36 +2861,75 @@ fn remove_join_expressions(
/// foo = bar => accum=[(foo, bar)] accum_filter=[]
/// foo = bar AND bar = baz => accum=[(foo, bar), (bar, baz)] accum_filter=[]
/// foo = bar AND baz > 1 => accum=[(foo, bar)] accum_filter=[baz > 1]
+///
+/// For normal expression join key, assume we have tables -- a(c0, c1 c2) and
b(c0, c1, c2):
Review Comment:
I think this kind of join key is typically called an "equijoin" rather than
"normal expression join key"
Thank you for these comments
##########
datafusion/sql/src/planner.rs:
##########
@@ -2934,6 +2999,31 @@ fn parse_sql_number(n: &str) -> Result<Expr> {
})
}
+/// Wrap projection for a plan, if the join keys contains normal expression.
+fn wrap_projection_for_join_if_necessary(
+ join_keys: &[Expr],
+ input: LogicalPlan,
+) -> Result<LogicalPlan> {
+ let expr_join_keys = join_keys
+ .iter()
+ .flat_map(|expr| expr.try_into_col().is_err().then_some(expr))
+ .cloned()
+ .collect::<Vec<_>>();
+
+ let plan = if expr_join_keys.is_empty().not() {
Review Comment:
I found the use of `not()` confusing here -- I think it would be more
consistent to use
```suggestion
let plan = if !expr_join_keys.is_empty() {
```
##########
datafusion/sql/src/planner.rs:
##########
@@ -5509,6 +5599,99 @@ mod tests {
assert!(logical_plan("SELECT \"1\"").is_err());
}
+ #[test]
+ fn test_constant_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id = 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Filter: person.id = Int64(10)\
+ \n CrossJoin:\
+ \n TableScan: person\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_right_left_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON orders.customer_id * 2 = person.id + 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ Int64(10)\
Review Comment:
👍
##########
datafusion/sql/src/planner.rs:
##########
@@ -5509,6 +5599,99 @@ mod tests {
assert!(logical_plan("SELECT \"1\"").is_err());
}
+ #[test]
+ fn test_constant_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id = 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Filter: person.id = Int64(10)\
+ \n CrossJoin:\
+ \n TableScan: person\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_right_left_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON orders.customer_id * 2 = person.id + 10";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2)\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_single_column_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + 10 = orders.customer_id * 2";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + Int64(10) = orders.customer_id * Int64(2)\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2)\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_multiple_column_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + person.age + 10 = orders.customer_id * 2 -
orders.price";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + person.age + Int64(10) =
orders.customer_id * Int64(2) - orders.price\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ person.age + Int64(10)\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2) - orders.price\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_left_projection_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id + person.age + 10 = orders.customer_id";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id + person.age + Int64(10) =
orders.customer_id\
+ \n Projection: person.id, person.first_name, person.last_name,
person.age, person.state, person.salary, person.birth_date, person.😀, person.id
+ person.age + Int64(10)\
+ \n TableScan: person\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn test_right_projection_expr_eq_join() {
+ let sql = "SELECT id, order_id \
+ FROM person \
+ INNER JOIN orders \
+ ON person.id = orders.customer_id * 2 - orders.price";
+
+ let expected = "Projection: person.id, orders.order_id\
+ \n Inner Join: person.id = orders.customer_id * Int64(2) -
orders.price\
+ \n TableScan: person\
+ \n Projection: orders.order_id, orders.customer_id,
orders.o_item_id, orders.qty, orders.price, orders.delivered,
orders.customer_id * Int64(2) - orders.price\
+ \n TableScan: orders";
+ quick_test(sql, expected);
+ }
+
Review Comment:
Very nice test !
As I recall this type of transformation can not always be done for all types
of joins (though perhaps it is ok for equijoins)
For example, I wonder if it is correct to always pull expressions below the
join for FULL OUTER JOINs 🤔
Can you add a test for something like
```
let sql = "SELECT id, order_id \
FROM person \
FULL OUTER JOIN orders \
ON person.id = 10";
```
if it doesn't already exist?
In that case the predicates need to be evaluated within the join (if it is
done as a filter afterwards it may filter rows that should not be filtered)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]