imback82 commented on a change in pull request #29074:
URL: https://github.com/apache/spark/pull/29074#discussion_r453261237



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
##########
@@ -171,19 +176,50 @@ case class EnsureRequirements(conf: SQLConf) extends 
Rule[SparkPlan] {
       leftPartitioning: Partitioning,
       rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = {
     if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) 
{
-      (leftPartitioning, rightPartitioning) match {
-        case (HashPartitioning(leftExpressions, _), _) =>
-          reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
leftExpressions, leftKeys)
-        case (_, HashPartitioning(rightExpressions, _)) =>
-          reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
rightExpressions, rightKeys)
-        case _ =>
-          (leftKeys, rightKeys)
-      }
+      reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, 
rightPartitioning)
+        .getOrElse((leftKeys, rightKeys))
     } else {
       (leftKeys, rightKeys)
     }
   }
 
+  /**
+   * Recursively reorders the join keys based on partitioning. It starts 
reordering the
+   * join keys to match HashPartitioning on either side, followed by 
PartitioningCollection.
+   */
+  private def reorderJoinKeysRecursively(
+      leftKeys: Seq[Expression],
+      rightKeys: Seq[Expression],
+      leftPartitioning: Partitioning,
+      rightPartitioning: Partitioning): Option[(Seq[Expression], 
Seq[Expression])] = {
+    (leftPartitioning, rightPartitioning) match {
+      case (HashPartitioning(leftExpressions, _), _) =>
+        reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
leftExpressions, leftKeys)
+          .orElse(reorderJoinKeysRecursively(
+            leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning))
+      case (_, HashPartitioning(rightExpressions, _)) =>
+        reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
rightExpressions, rightKeys)
+          .orElse(reorderJoinKeysRecursively(

Review comment:
       This can be also implemented by looking at left partitioning first then 
move to the right partitionoing:
   ```scala
       (leftPartitioning, rightPartitioning) match {
         case (HashPartitioning(leftExpressions, _), _) =>
           reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
leftExpressions, leftKeys)
             .orElse(reorderJoinKeysRecursively(
               leftKeys, rightKeys, UnknownPartitioning(0), rightPartitioning))
         case (PartitioningCollection(partitionings), _) =>
           partitionings.foreach { p =>
             reorderJoinKeysRecursively(leftKeys, rightKeys, p, 
rightPartitioning).map { k =>
               return Some(k)
             }
           }
           reorderJoinKeysRecursively(leftKeys, rightKeys, 
UnknownPartitioning(0), rightPartitioning)
         case (_, HashPartitioning(rightExpressions, _)) =>
           reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, 
rightExpressions, rightKeys)
         case (_, PartitioningCollection(partitionings)) =>
           partitionings.foreach { p =>
             reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, 
p).map { k =>
               return Some(k)
             }
           }
           None
         case _ =>
           None
       }
   ```
   However, I chose this way so that the behavior remains the same. If you have 
`leftPartitioning = PartitioningCollection` and `rightPartitioning = 
HashPartitioning`, it will match the `rightPartitioning` first, which is the 
existing behavior. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to