Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21529#discussion_r195099853
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala ---
    @@ -679,6 +679,23 @@ class PlannerSuite extends SharedSQLContext {
         }
         assert(rangeExecInZeroPartition.head.outputPartitioning == 
UnknownPartitioning(0))
       }
    +
    +  test("SPARK-24495: EnsureRequirements can return wrong plan when reusing 
the same key in join") {
    +    val plan1 = DummySparkPlan(outputOrdering = Seq(orderingA),
    +      outputPartitioning = HashPartitioning(exprA :: exprA :: Nil, 5))
    +    val plan2 = DummySparkPlan(outputOrdering = Seq(orderingB),
    +      outputPartitioning = HashPartitioning(exprB :: Nil, 5))
    +    val smjExec = SortMergeJoinExec(
    +      exprA :: exprA :: Nil, exprB :: exprC :: Nil, Inner, None, plan1, 
plan2)
    +
    +    val outputPlan = 
EnsureRequirements(spark.sessionState.conf).apply(smjExec)
    +    outputPlan match {
    +      case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _) =>
    +        assert(leftKeys == Seq(exprA, exprA))
    +        assert(rightKeys.contains(exprB) && rightKeys.contains(exprC))
    --- End diff --
    
    is it better to check `rightKeys == Seq(exprB, exprC)`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to