This is an automated email from the ASF dual-hosted git repository.

ozankabak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 901a094d4b Update join_selection.rs (#13893)
901a094d4b is described below

commit 901a094d4b2b800889570d5e23ba18bb2d57ee50
Author: Berkay Şahin <[email protected]>
AuthorDate: Tue Dec 24 20:11:03 2024 +0300

    Update join_selection.rs (#13893)
---
 .../core/src/physical_optimizer/join_selection.rs     | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs 
b/datafusion/core/src/physical_optimizer/join_selection.rs
index 009757f3a9..7b977899b0 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -61,7 +61,7 @@ impl JoinSelection {
 // TODO: We need some performance test for Right Semi/Right Join swap to Left 
Semi/Left Join in case that the right side is smaller but not much smaller.
 // TODO: In PrestoSQL, the optimizer flips join sides only if one side is much 
smaller than the other by more than SIZE_DIFFERENCE_THRESHOLD times, by default 
is 8 times.
 /// Checks statistics for join swap.
-fn should_swap_join_order(
+pub(crate) fn should_swap_join_order(
     left: &dyn ExecutionPlan,
     right: &dyn ExecutionPlan,
 ) -> Result<bool> {
@@ -108,7 +108,7 @@ fn supports_collect_by_thresholds(
 }
 
 /// Predicate that checks whether the given join type supports input swapping.
-fn supports_swap(join_type: JoinType) -> bool {
+pub(crate) fn supports_swap(join_type: JoinType) -> bool {
     matches!(
         join_type,
         JoinType::Inner
@@ -176,7 +176,7 @@ fn swap_join_projection(
 /// This function swaps the inputs of the given join operator.
 /// This function is public so other downstream projects can use it
 /// to construct `HashJoinExec` with right side as the build side.
-pub fn swap_hash_join(
+pub(crate) fn swap_hash_join(
     hash_join: &HashJoinExec,
     partition_mode: PartitionMode,
 ) -> Result<Arc<dyn ExecutionPlan>> {
@@ -222,7 +222,7 @@ pub fn swap_hash_join(
 }
 
 /// Swaps inputs of `NestedLoopJoinExec` and wraps it into `ProjectionExec` is 
required
-fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn ExecutionPlan>> {
+pub(crate) fn swap_nl_join(join: &NestedLoopJoinExec) -> Result<Arc<dyn 
ExecutionPlan>> {
     let new_filter = swap_join_filter(join.filter());
     let new_join_type = &swap_join_type(*join.join_type());
 
@@ -359,7 +359,7 @@ impl PhysicalOptimizerRule for JoinSelection {
 /// `CollectLeft` mode is applicable. Otherwise, it will try to swap the join 
sides.
 /// When the `ignore_threshold` is false, this function will also check left
 /// and right sizes in bytes or rows.
-fn try_collect_left(
+pub(crate) fn try_collect_left(
     hash_join: &HashJoinExec,
     ignore_threshold: bool,
     threshold_byte_size: usize,
@@ -421,7 +421,14 @@ fn try_collect_left(
     }
 }
 
-fn partitioned_hash_join(hash_join: &HashJoinExec) -> Result<Arc<dyn 
ExecutionPlan>> {
+/// Creates a partitioned hash join execution plan, swapping inputs if 
beneficial.
+///
+/// Checks if the join order should be swapped based on the join type and 
input statistics.
+/// If swapping is optimal and supported, creates a swapped partitioned hash 
join; otherwise,
+/// creates a standard partitioned hash join.
+pub(crate) fn partitioned_hash_join(
+    hash_join: &HashJoinExec,
+) -> Result<Arc<dyn ExecutionPlan>> {
     let left = hash_join.left();
     let right = hash_join.right();
     if supports_swap(*hash_join.join_type()) && 
should_swap_join_order(&**left, &**right)?


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to