Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21529#discussion_r195099853
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala ---
@@ -679,6 +679,23 @@ class PlannerSuite extends SharedSQLContext {
}
assert(rangeExecInZeroPartition.head.outputPartitioning ==
UnknownPartitioning(0))
}
+
+ test("SPARK-24495: EnsureRequirements can return wrong plan when reusing
the same key in join") {
+ val plan1 = DummySparkPlan(outputOrdering = Seq(orderingA),
+ outputPartitioning = HashPartitioning(exprA :: exprA :: Nil, 5))
+ val plan2 = DummySparkPlan(outputOrdering = Seq(orderingB),
+ outputPartitioning = HashPartitioning(exprB :: Nil, 5))
+ val smjExec = SortMergeJoinExec(
+ exprA :: exprA :: Nil, exprB :: exprC :: Nil, Inner, None, plan1,
plan2)
+
+ val outputPlan =
EnsureRequirements(spark.sessionState.conf).apply(smjExec)
+ outputPlan match {
+ case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _) =>
+ assert(leftKeys == Seq(exprA, exprA))
+ assert(rightKeys.contains(exprB) && rightKeys.contains(exprC))
--- End diff --
is it better to check `rightKeys == Seq(exprB, exprC)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]