Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21529#discussion_r194830452
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala ---
@@ -679,6 +679,17 @@ class PlannerSuite extends SharedSQLContext {
}
assert(rangeExecInZeroPartition.head.outputPartitioning ==
UnknownPartitioning(0))
}
+
+ test("SPARK-24495: EnsureRequirements can return wrong plan when reusing
the same key in join") {
+ withSQLConf(("spark.sql.shuffle.partitions", "1"),
+ ("spark.sql.constraintPropagation.enabled", "false"),
+ ("spark.sql.autoBroadcastJoinThreshold", "-1")) {
+ val df1 = spark.range(100).repartition(2, $"id", $"id")
+ val df2 = spark.range(100).select(($"id" * 2).as("b1"), (-
$"id").as("b2"))
--- End diff --
`($"id" * 2).as("b1")` -> `$"id".as("b1")`, to minimize the test.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]