alamb commented on code in PR #7612:
URL: https://github.com/apache/arrow-datafusion/pull/7612#discussion_r1349506258


##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -341,65 +393,32 @@ pub async fn from_substrait_rel(
             // The join condition expression needs full input schema and not 
the output schema from join since we lose columns from
             // certain join types such as semi and anti joins
             let in_join_schema = left.schema().join(right.schema())?;
-            // Parse post join filter if exists
-            let join_filter = match &join.post_join_filter {
-                Some(filter) => {
-                    let parsed_filter =
-                        from_substrait_rex(filter, &in_join_schema, 
extensions).await?;
-                    Some(parsed_filter.as_ref().clone())
-                }
-                None => None,
-            };
+
             // If join expression exists, parse the `on` condition expression, 
build join and return
-            // Otherwise, build join with koin filter, without join keys
+            // Otherwise, build join with only the filter, without join keys
             match &join.expression.as_ref() {
                 Some(expr) => {
                     let on =
                         from_substrait_rex(expr, &in_join_schema, 
extensions).await?;
-                    let predicates = split_conjunction(&on);
-                    // TODO: collect only one null_eq_null
-                    let join_exprs: Vec<(Column, Column, bool)> = predicates
-                        .iter()
-                        .map(|p| match p {
-                            Expr::BinaryExpr(BinaryExpr { left, op, right }) 
=> {
-                                match (left.as_ref(), right.as_ref()) {
-                                    (Expr::Column(l), Expr::Column(r)) => 
match op {
-                                        Operator::Eq => Ok((l.clone(), 
r.clone(), false)),
-                                        Operator::IsNotDistinctFrom => {
-                                            Ok((l.clone(), r.clone(), true))
-                                        }
-                                        _ => plan_err!("invalid join condition 
op"),
-                                    },
-                                    _ => plan_err!("invalid join condition 
expression"),
-                                }
-                            }
-                            _ => plan_err!(
-                                "Non-binary expression is not supported in 
join condition"
-                            ),
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    let (left_cols, right_cols, null_eq_nulls): (Vec<_>, 
Vec<_>, Vec<_>) =
-                        itertools::multiunzip(join_exprs);
+                    // The join expression can contain both equal and 
non-equal ops.

Review Comment:
   Given that the 
[`ExtractEquijoinPredicate`](https://github.com/apache/arrow-datafusion/blob/093b775adc3593e9e5cb7343e28406ed458551ad/datafusion/optimizer/src/extract_equijoin_predicate.rs#L31-L41)
 optimizer pass already  splits up join predicates into equijoin predicates and 
"other" predicates, I wonder if simply create the `LogicalPlan::Join` using 
`join.expression` (and let the subsequent optimizer pass sort it out)? 
   
   Something like
   
   ```rust
   left.join(
     right.build()?,
     join_type,
     (vec![], vec![]),
     on, // <-- use the filter directly here, let optimizer pass extract the 
equijoin columns
     nulls_equal_nulls,
   )?
   ```
   
   It makes me realize when looking at the API for 
[`LogicalPlanBuilder::join`](https://docs.rs/datafusion/latest/datafusion/logical_expr/struct.LogicalPlanBuilder.html#method.join)
 that the API is super confusing. It would be nice to improve that API to make 
it clear that a `join` can just take a single `Expr` and DataFusion will sort 
out figuring out the join columns, etc.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to