ulysses-you commented on a change in pull request #34069:
URL: https://github.com/apache/spark/pull/34069#discussion_r715274411



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
##########
@@ -35,21 +35,27 @@ import 
org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNes
  *    stage in case of the larger join child relation finishes before the 
smaller relation. Note
  *    that this rule needs to be applied before regular join strategies.
  */
-object LogicalQueryStageStrategy extends Strategy with PredicateHelper {
+object LogicalQueryStageStrategy extends Strategy with PredicateHelper with 
JoinSelectionHelper {
 
   private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match {
     case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true
     case _ => false
   }
 
   def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, otherCondition, _,
+    case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, 
otherCondition, _,
           left, right, hint)
         if isBroadcastStage(left) || isBroadcastStage(right) =>
       val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight
-      Seq(BroadcastHashJoinExec(
-        leftKeys, rightKeys, joinType, buildSide, otherCondition, 
planLater(left),
-        planLater(right)))
+      if ((hintToBroadcastNLLeft(hint) && isBroadcastStage(left)) ||
+        hintToBroadcastNLRight(hint) && isBroadcastStage(right)) {

Review comment:
       Not sure it will be clearer, the `LogicalQueryStageStrategy` is used to 
retain the origin broadcast join so it should know how broadcast join generated 
(include bhj and bnlj).
   
   If we move this into `JoinSelection`, it looks like:
   ```scala
   // LogicalQueryStageStrategy
   case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, otherCondition, 
_,
             left, right, hint) =>
     q.physicalPlan :: Nil
   
   ...
   ```
   
   ```scala
   // JoinSelection
   case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, otherCondition, 
_,
         left, right, hint)
       if isBroadcastStage(left) || isBroadcastStage(right) =>
     val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight
     if ((hintToBroadcastNLLeft(hint) && isBroadcastStage(left)) ||
       hintToBroadcastNLRight(hint) && isBroadcastStage(right)) {
       Seq(BroadcastNestedLoopJoinExec(
         planLater(left), planLater(right), buildSide, joinType, j.condition))
     } else {
       Seq(BroadcastHashJoinExec(
         leftKeys, rightKeys, joinType, buildSide, otherCondition, 
planLater(left),
         planLater(right)))
     }
   
   // the old equi-join code
   case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, nonEquiCond,
       _, left, right, hint) =>
   ...
   ```
   
   Or, we can even move the whole code of `LogicalQueryStageStrategy` into 
`JoinSelection`. But for me, I perfer the current approach which makes 
broadcast stage easy to maintain.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to