Github user yucai commented on a diff in the pull request:
https://github.com/apache/spark/pull/21564#discussion_r196305164
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala ---
@@ -686,6 +686,67 @@ class PlannerSuite extends SharedSQLContext {
Range(1, 2, 1, 1)))
df.queryExecution.executedPlan.execute()
}
+
+ test("SPARK-24556: always rewrite output partitioning in
ReusedExchangeExec " +
+ "and InMemoryTableScanExec") {
+ def checkOutputPartitioningRewrite(
+ plans: Seq[SparkPlan],
--- End diff --
How do you think if I need merge `check*OutputPartitioningRewrite` together?
```
def checkPlanAndOutputPartitioningRewrite(
df: DataFrame,
expectedPlanClass: Class[_],
expectedPartitioningClass: Class[_]): Unit = {
val plans = df.queryExecution.executedPlan.collect {
case r: ReusedExchangeExec => r
case m: InMemoryTableScanExec => m
}
assert(plans.size == 1)
val plan = plans.head
assert(plan.getClass == expectedPlanClass)
val partitioning = plan.outputPartitioning
assert(partitioning.getClass == expectedPartitioningClass)
val partitionedAttrs =
partitioning.asInstanceOf[Expression].references
assert(partitionedAttrs.subsetOf(plan.outputSet))
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]