This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 2ad11b632e07 [SPARK-46037][SQL] Correctness fix for Shuffled Hash Join
build left without codegen
2ad11b632e07 is described below
commit 2ad11b632e072f47b84793a6cbaeb06c984b0e35
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Aug 29 12:27:41 2024 +0800
[SPARK-46037][SQL] Correctness fix for Shuffled Hash Join build left
without codegen
This is a re-submitting of https://github.com/apache/spark/pull/43938 to
fix a join correctness bug caused by https://github.com/apache/spark/pull/41398
. Credits go to mcdull-zhang
correctness fix
Yes, the query result will be corrected.
new test
no
Closes #47905 from cloud-fan/join.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit af5e0a267e5a37adc25bdf9c78b6fe207ef7bfb5)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/execution/joins/HashJoin.scala | 5 ++---
.../spark/sql/execution/joins/OuterJoinSuite.scala | 22 ++++++++++++++++++++--
2 files changed, 22 insertions(+), 5 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 7c48baf99ef8..07f7915416c1 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -138,9 +138,8 @@ trait HashJoin extends JoinCodegenSupport {
UnsafeProjection.create(streamedBoundKeys)
@transient protected[this] lazy val boundCondition = if
(condition.isDefined) {
- if (joinType == FullOuter && buildSide == BuildLeft) {
- // Put join left side before right side. This is to be consistent with
- // `ShuffledHashJoinExec.fullOuterJoin`.
+ if ((joinType == FullOuter || joinType == LeftOuter) && buildSide ==
BuildLeft) {
+ // Put join left side before right side.
Predicate.create(condition.get, buildPlan.output ++
streamedPlan.output).eval _
} else {
Predicate.create(condition.get, streamedPlan.output ++
buildPlan.output).eval _
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
index 4f78833abdb9..a4a3d76db313 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
@@ -26,10 +26,11 @@ import org.apache.spark.sql.catalyst.plans.logical.{Join,
JoinHint}
import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest}
import org.apache.spark.sql.execution.exchange.EnsureRequirements
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestData}
import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
-class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
+class OuterJoinSuite extends SparkPlanTest with SharedSparkSession with
SQLTestData {
+ setupTestData()
private val EnsureRequirements = new EnsureRequirements()
@@ -325,4 +326,21 @@ class OuterJoinSuite extends SparkPlanTest with
SharedSparkSession {
(null, null, 7, 7.0)
)
)
+
+ testWithWholeStageCodegenOnAndOff(
+ "SPARK-46037: ShuffledHashJoin build left with left outer join, codegen
off") { _ =>
+ def join(hint: String): DataFrame = {
+ sql(
+ s"""
+ |SELECT /*+ $hint */ *
+ |FROM testData t1
+ |LEFT OUTER JOIN
+ |testData2 t2
+ |ON key = a AND concat(value, b) = '12'
+ |""".stripMargin)
+ }
+ val df1 = join("SHUFFLE_HASH(t1)")
+ val df2 = join("SHUFFLE_MERGE(t1)")
+ checkAnswer(df1, identity, df2.collect().toSeq)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]