mingmwang commented on code in PR #4219: URL: https://github.com/apache/arrow-datafusion/pull/4219#discussion_r1023840547
########## datafusion/core/src/physical_optimizer/join_selection.rs: ########## @@ -150,66 +207,139 @@ fn swap_join_filter(filter: &Option<JoinFilter>) -> Option<JoinFilter> { } } -impl PhysicalOptimizerRule for HashBuildProbeOrder { +impl PhysicalOptimizerRule for JoinSelection { fn optimize( &self, plan: Arc<dyn ExecutionPlan>, session_config: &SessionConfig, ) -> Result<Arc<dyn ExecutionPlan>> { - let plan = optimize_children(self, plan, session_config)?; - if let Some(hash_join) = plan.as_any().downcast_ref::<HashJoinExec>() { - let left = hash_join.left(); - let right = hash_join.right(); + let collect_left_threshold = session_config.hash_join_collect_left_threshold; + plan.transform_up(&|plan| { + if let Some(hash_join) = plan.as_any().downcast_ref::<HashJoinExec>() { + if matches!(hash_join.partition_mode(), PartitionMode::Auto) { + try_collect_left(hash_join, collect_left_threshold) + .unwrap() + .or_else(|| Some(partitioned_hash_join(hash_join).unwrap())) + } else { + let left = hash_join.left(); + let right = hash_join.right(); + if should_swap_join_order(&**left, &**right) + && supports_swap(*hash_join.join_type()) + { + Some( + swap_hash_join( + hash_join, + *hash_join.partition_mode(), + left, + right, + ) + .unwrap(), + ) + } else { + None + } + } + } else if let Some(cross_join) = plan.as_any().downcast_ref::<CrossJoinExec>() + { + let left = cross_join.left(); + let right = cross_join.right(); + if should_swap_join_order(&**left, &**right) { + let new_join = + CrossJoinExec::try_new(Arc::clone(right), Arc::clone(left)) + .unwrap(); + // TODO avoid adding ProjectionExec again and again, only adding Final Projection + let proj = ProjectionExec::try_new( + swap_reverting_projection(&left.schema(), &right.schema()), + Arc::new(new_join), + ) + .unwrap(); + Some(Arc::new(proj)) + } else { + None + } + } else { + None + } + }) + } + + fn name(&self) -> &str { + "[CBO]join_selection" + } +} + +fn try_collect_left( Review Comment: Sure, I will add more comments. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org