Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19714#discussion_r153771297
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
 ---
    @@ -223,4 +223,69 @@ class BroadcastJoinSuite extends QueryTest with 
SQLTestUtils {
         assert(HashJoin.rewriteKeyExpr(l :: ss :: Nil) === l :: ss :: Nil)
         assert(HashJoin.rewriteKeyExpr(i :: ss :: Nil) === i :: ss :: Nil)
       }
    +
    +  test("Shouldn't change broadcast join buildSide if user clearly 
specified") {
    +    def assertJoinBuildSide(pair: (String, String, BuildSide)): Any = {
    +      val (sqlString, joinMethod, buildSide) = pair
    +      val executedPlan = sql(sqlString).queryExecution.executedPlan
    +      executedPlan match {
    +        case b: BroadcastNestedLoopJoinExec =>
    +          assert(b.getClass.getSimpleName === joinMethod)
    +          assert(b.buildSide === buildSide)
    +        case w: WholeStageCodegenExec =>
    +          assert(w.children.head.getClass.getSimpleName === joinMethod)
    +          
assert(w.children.head.asInstanceOf[BroadcastHashJoinExec].buildSide === 
buildSide)
    +      }
    +    }
    +
    +    withTempView("t1", "t2") {
    +      spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", 
"value").createTempView("t1")
    +      spark.createDataFrame(Seq((1, "1"), (2, "12.3"), (2, 
"123"))).toDF("key", "value")
    +        .createTempView("t2")
    +
    +      val t1Size = 
spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
    +      val t2Size = 
spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
    +      assert(t1Size < t2Size)
    +
    +      val bh = BroadcastHashJoinExec.toString
    +      val bl = BroadcastNestedLoopJoinExec.toString
    +
    +      Seq(
    +        // INNER JOIN && t1Size < t2Size => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2 ON t1.key = 
t2.key", bh, BuildLeft),
    +        // LEFT JOIN => BuildRight
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2 ON t1.key = 
t2.key", bh, BuildRight),
    +        // RIGHT JOIN => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2 ON t1.key 
= t2.key", bh, BuildLeft),
    +        // INNER JOIN && broadcast(t1) => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2 ON t1.key = t2.key", 
bh, BuildLeft),
    +        // INNER JOIN && broadcast(t2) => BuildRight
    +        ("SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", 
bh, BuildRight)
    +      ).foreach(assertJoinBuildSide)
    --- End diff --
    
    I think it's more readable to write
    ```
    assertJoinBuildSide(...)
    assertJoinBuildSide(...)
    ...
    ```
    than
    ```
    Seq(
      ...
    ).foreach(assertJoinBuildSide)
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to