alamb commented on code in PR #7612:
URL: https://github.com/apache/arrow-datafusion/pull/7612#discussion_r1349507545


##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -341,65 +393,32 @@ pub async fn from_substrait_rel(
             // The join condition expression needs full input schema and not 
the output schema from join since we lose columns from
             // certain join types such as semi and anti joins
             let in_join_schema = left.schema().join(right.schema())?;
-            // Parse post join filter if exists
-            let join_filter = match &join.post_join_filter {
-                Some(filter) => {
-                    let parsed_filter =
-                        from_substrait_rex(filter, &in_join_schema, 
extensions).await?;
-                    Some(parsed_filter.as_ref().clone())
-                }
-                None => None,
-            };
+
             // If join expression exists, parse the `on` condition expression, 
build join and return
-            // Otherwise, build join with koin filter, without join keys
+            // Otherwise, build join with only the filter, without join keys
             match &join.expression.as_ref() {
                 Some(expr) => {
                     let on =
                         from_substrait_rex(expr, &in_join_schema, 
extensions).await?;
-                    let predicates = split_conjunction(&on);
-                    // TODO: collect only one null_eq_null
-                    let join_exprs: Vec<(Column, Column, bool)> = predicates
-                        .iter()
-                        .map(|p| match p {
-                            Expr::BinaryExpr(BinaryExpr { left, op, right }) 
=> {
-                                match (left.as_ref(), right.as_ref()) {
-                                    (Expr::Column(l), Expr::Column(r)) => 
match op {
-                                        Operator::Eq => Ok((l.clone(), 
r.clone(), false)),
-                                        Operator::IsNotDistinctFrom => {
-                                            Ok((l.clone(), r.clone(), true))
-                                        }
-                                        _ => plan_err!("invalid join condition 
op"),
-                                    },
-                                    _ => plan_err!("invalid join condition 
expression"),
-                                }
-                            }
-                            _ => plan_err!(
-                                "Non-binary expression is not supported in 
join condition"
-                            ),
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    let (left_cols, right_cols, null_eq_nulls): (Vec<_>, 
Vec<_>, Vec<_>) =
-                        itertools::multiunzip(join_exprs);
+                    // The join expression can contain both equal and 
non-equal ops.

Review Comment:
   It turns out this is exactly what `DataFrame::join_on` does -- I have filed 
a ticket with a way to make this clearer: 
https://github.com/apache/arrow-datafusion/issues/7766#issue-1931312146



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to