xudong963 commented on a change in pull request #1566:
URL: https://github.com/apache/arrow-datafusion/pull/1566#discussion_r787756794



##########
File path: datafusion/src/sql/planner.rs
##########
@@ -714,33 +713,79 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 extract_possible_join_keys(&filter_expr, &mut 
possible_join_keys)?;
 
                 let mut all_join_keys = HashSet::new();
-                let mut left = plans[0].clone();
-                for right in plans.iter().skip(1) {
-                    let left_schema = left.schema();
-                    let right_schema = right.schema();
+
+                let mut plans = plans.into_iter();
+                let mut left = plans.next().unwrap(); // have at least one plan
+
+                // List of the plans that have not yet been joined
+                let mut remaining_plans: Vec<Option<LogicalPlan>> =
+                    plans.into_iter().map(Some).collect();
+
+                // Take from the list of remaining plans,
+                loop {
                     let mut join_keys = vec![];
-                    for (l, r) in &possible_join_keys {
-                        if left_schema.field_from_column(l).is_ok()
-                            && right_schema.field_from_column(r).is_ok()
-                        {
-                            join_keys.push((l.clone(), r.clone()));
-                        } else if left_schema.field_from_column(r).is_ok()
-                            && right_schema.field_from_column(l).is_ok()
-                        {
-                            join_keys.push((r.clone(), l.clone()));
+
+                    // Search all remaining plans for the next to
+                    // join. Prefer the first one that has a join
+                    // predicate in the predicate lists
+                    let plan_with_idx = 
remaining_plans.iter().enumerate().find(|(_idx, plan)| {
+                        // skip plans that have been joined already
+                        let plan = if let Some(plan) = plan {
+                            plan
+                        } else {
+                            return false;
+                        };
+
+                        // can we find a match?
+                        let left_schema = left.schema();
+                        let right_schema = plan.schema();
+                        for (l, r) in &possible_join_keys {
+                            if left_schema.field_from_column(l).is_ok()
+                                && right_schema.field_from_column(r).is_ok()
+                            {
+                                join_keys.push((l.clone(), r.clone()));
+                            } else if left_schema.field_from_column(r).is_ok()
+                                && right_schema.field_from_column(l).is_ok()
+                            {
+                                join_keys.push((r.clone(), l.clone()));
+                            }
                         }
-                    }
+                        // stop if we found join keys
+                        !join_keys.is_empty()
+                    });
+
+                    // If we did not find join keys, either there are
+                    // no more plans, or we can't find any plans that
+                    // can be joined with predicates
                     if join_keys.is_empty() {
-                        left =
-                            
LogicalPlanBuilder::from(left).cross_join(right)?.build()?;
+                        assert!(plan_with_idx.is_none());

Review comment:
       Very tight!

##########
File path: datafusion/src/sql/planner.rs
##########
@@ -714,33 +713,79 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 extract_possible_join_keys(&filter_expr, &mut 
possible_join_keys)?;
 
                 let mut all_join_keys = HashSet::new();
-                let mut left = plans[0].clone();
-                for right in plans.iter().skip(1) {
-                    let left_schema = left.schema();
-                    let right_schema = right.schema();
+
+                let mut plans = plans.into_iter();
+                let mut left = plans.next().unwrap(); // have at least one plan
+
+                // List of the plans that have not yet been joined
+                let mut remaining_plans: Vec<Option<LogicalPlan>> =
+                    plans.into_iter().map(Some).collect();
+
+                // Take from the list of remaining plans,
+                loop {
                     let mut join_keys = vec![];
-                    for (l, r) in &possible_join_keys {
-                        if left_schema.field_from_column(l).is_ok()
-                            && right_schema.field_from_column(r).is_ok()
-                        {
-                            join_keys.push((l.clone(), r.clone()));
-                        } else if left_schema.field_from_column(r).is_ok()
-                            && right_schema.field_from_column(l).is_ok()
-                        {
-                            join_keys.push((r.clone(), l.clone()));
+
+                    // Search all remaining plans for the next to
+                    // join. Prefer the first one that has a join
+                    // predicate in the predicate lists
+                    let plan_with_idx = 
remaining_plans.iter().enumerate().find(|(_idx, plan)| {

Review comment:
       Here, I use `plan_with_idx` to replace `idx`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to