This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 4e33454 [SPARK-32220][SQL][3.0][FOLLOW-UP] SHUFFLE_REPLICATE_NL Hint
should not change Non-Cartesian Product join result
4e33454 is described below
commit 4e33454add5528e1214ffa2a1f09500ede11f1ab
Author: angerszhu <[email protected]>
AuthorDate: Tue Jul 14 00:56:47 2020 -0700
[SPARK-32220][SQL][3.0][FOLLOW-UP] SHUFFLE_REPLICATE_NL Hint should not
change Non-Cartesian Product join result
### What changes were proposed in this pull request?
follow comment
https://github.com/apache/spark/pull/29035#discussion_r453468999
Explain for pr
### Why are the changes needed?
add comment
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Not need
Closes #29093 from AngersZhuuuu/SPARK-32220-3.0-FOLLOWUP.
Authored-by: angerszhu <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/execution/SparkStrategies.scala | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index e041c54..28d1ccb 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -244,7 +244,7 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
// 4. Pick cartesian product if join type is inner like.
// 5. Pick broadcast nested loop join as the final solution. It may
OOM but we don't have
// other choice.
- case p @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition,
left, right, hint) =>
+ case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, nonEquiCond,
left, right, hint) =>
def createBroadcastHashJoin(buildLeft: Boolean, buildRight: Boolean) =
{
val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
val wantToBuildRight = canBuildRight(joinType) && buildRight
@@ -254,7 +254,7 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
rightKeys,
joinType,
buildSide,
- condition,
+ nonEquiCond,
planLater(left),
planLater(right)))
}
@@ -269,7 +269,7 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
rightKeys,
joinType,
buildSide,
- condition,
+ nonEquiCond,
planLater(left),
planLater(right)))
}
@@ -278,7 +278,7 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
def createSortMergeJoin() = {
if (RowOrdering.isOrderable(leftKeys)) {
Some(Seq(joins.SortMergeJoinExec(
- leftKeys, rightKeys, joinType, condition, planLater(left),
planLater(right))))
+ leftKeys, rightKeys, joinType, nonEquiCond, planLater(left),
planLater(right))))
} else {
None
}
@@ -286,7 +286,9 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
def createCartesianProduct() = {
if (joinType.isInstanceOf[InnerLike]) {
- Some(Seq(joins.CartesianProductExec(planLater(left),
planLater(right), p.condition)))
+ // `CartesianProductExec` can't implicitly evaluate equal join
condition, here we should
+ // pass the original condition which includes both equal and
non-equal conditions.
+ Some(Seq(joins.CartesianProductExec(planLater(left),
planLater(right), j.condition)))
} else {
None
}
@@ -311,7 +313,7 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
// This join could be very slow or OOM
val buildSide = getSmallerSide(left, right)
Seq(joins.BroadcastNestedLoopJoinExec(
- planLater(left), planLater(right), buildSide, joinType,
condition))
+ planLater(left), planLater(right), buildSide, joinType,
nonEquiCond))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]