Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19714#discussion_r153771297
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
---
@@ -223,4 +223,69 @@ class BroadcastJoinSuite extends QueryTest with
SQLTestUtils {
assert(HashJoin.rewriteKeyExpr(l :: ss :: Nil) === l :: ss :: Nil)
assert(HashJoin.rewriteKeyExpr(i :: ss :: Nil) === i :: ss :: Nil)
}
+
+ test("Shouldn't change broadcast join buildSide if user clearly
specified") {
+ def assertJoinBuildSide(pair: (String, String, BuildSide)): Any = {
+ val (sqlString, joinMethod, buildSide) = pair
+ val executedPlan = sql(sqlString).queryExecution.executedPlan
+ executedPlan match {
+ case b: BroadcastNestedLoopJoinExec =>
+ assert(b.getClass.getSimpleName === joinMethod)
+ assert(b.buildSide === buildSide)
+ case w: WholeStageCodegenExec =>
+ assert(w.children.head.getClass.getSimpleName === joinMethod)
+
assert(w.children.head.asInstanceOf[BroadcastHashJoinExec].buildSide ===
buildSide)
+ }
+ }
+
+ withTempView("t1", "t2") {
+ spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key",
"value").createTempView("t1")
+ spark.createDataFrame(Seq((1, "1"), (2, "12.3"), (2,
"123"))).toDF("key", "value")
+ .createTempView("t2")
+
+ val t1Size =
spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
+ val t2Size =
spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
+ assert(t1Size < t2Size)
+
+ val bh = BroadcastHashJoinExec.toString
+ val bl = BroadcastNestedLoopJoinExec.toString
+
+ Seq(
+ // INNER JOIN && t1Size < t2Size => BuildLeft
+ ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2 ON t1.key =
t2.key", bh, BuildLeft),
+ // LEFT JOIN => BuildRight
+ ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2 ON t1.key =
t2.key", bh, BuildRight),
+ // RIGHT JOIN => BuildLeft
+ ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2 ON t1.key
= t2.key", bh, BuildLeft),
+ // INNER JOIN && broadcast(t1) => BuildLeft
+ ("SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2 ON t1.key = t2.key",
bh, BuildLeft),
+ // INNER JOIN && broadcast(t2) => BuildRight
+ ("SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key",
bh, BuildRight)
+ ).foreach(assertJoinBuildSide)
--- End diff --
I think it's more readable to write
```
assertJoinBuildSide(...)
assertJoinBuildSide(...)
...
```
than
```
Seq(
...
).foreach(assertJoinBuildSide)
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]