james727 commented on a change in pull request #1618:
URL: https://github.com/apache/arrow-datafusion/pull/1618#discussion_r789922499
##########
File path: datafusion/src/optimizer/filter_push_down.rs
##########
@@ -253,33 +176,107 @@ fn split_members<'a>(predicate: &'a Expr, predicates:
&mut Vec<&'a Expr>) {
}
}
+// For a given JOIN logical plan, determine whether each side of the join is
preserved.
+// We say a join side is preserved if the join returns all or a subset of the
rows from
+// the relevant side - i.e. the side of the join cannot provide nulls. Returns
a tuple
+// of booleans - (left_preserved, right_preserved).
+fn lr_is_preserved(plan: &LogicalPlan) -> (bool, bool) {
+ match plan {
+ LogicalPlan::Join(Join { join_type, .. }) => match join_type {
+ JoinType::Inner => (true, true),
+ JoinType::Left => (true, false),
+ JoinType::Right => (false, true),
+ JoinType::Full => (false, false),
+ // No columns from the right side of the join can be referenced in
output
+ // predicates for semi/anti joins, so whether we specify t/f
doesn't matter.
+ JoinType::Semi | JoinType::Anti => (true, false),
+ },
+ LogicalPlan::CrossJoin(_) => (true, true),
+ _ => unreachable!("lr_is_preserved only valid for JOIN nodes"),
+ }
+}
+
+// Determine which predicates in state can be pushed down to a given side of a
join.
+// To determine this, we need to know the schema of the relevant join side and
whether
+// or not the side's rows are preserved when joining. If the side is not
preserved, we
+// do not push down anything. Otherwise we can push down predicates where all
of the
+// relevant columns are contained on the relevant join side's schema.
+fn get_pushable_join_predicates<'a>(
+ state: &'a State,
+ schema: &DFSchema,
+ preserved: bool,
+) -> Predicates<'a> {
+ if !preserved {
Review comment:
Oh jeez - facepalm
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]