imback82 commented on a change in pull request #26943: [SPARK-30298][SQL] 
Bucket join should work for self-join with views
URL: https://github.com/apache/spark/pull/26943#discussion_r370304219
 
 

 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
 ##########
 @@ -937,6 +938,93 @@ class PlannerSuite extends SharedSparkSession with 
AdaptiveSparkPlanHelper {
       }
     }
   }
+
+  test("aliases in the project should not introduce extra shuffle") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("df1", "df2") {
+        spark.range(10).selectExpr("id AS key", 
"0").repartition($"key").createTempView("df1")
+        spark.range(10).selectExpr("id AS key", 
"0").repartition($"key").createTempView("df2")
+        val planned = sql(
+          """
+            |SELECT * FROM
+            |  (SELECT key AS k from df1) t1
+            |INNER JOIN
+            |  (SELECT key AS k from df2) t2
+            |ON t1.k = t2.k
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
 
 Review comment:
   Thanks for pointing that out. I updated it and it now generates two 
`ShuffleExchangeExec` instead of four.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to