Github user mridulm commented on a diff in the pull request:
https://github.com/apache/spark/pull/22010#discussion_r218946996
--- Diff: core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
---
@@ -35,16 +35,24 @@ import org.apache.spark.{Partition, TaskContext}
* @param isOrderSensitive whether or not the function is order-sensitive.
If it's order
* sensitive, it may return totally different
result when the input order
* is changed. Mostly stateful functions are
order-sensitive.
+ * @param knownPartitioner If the result has a known partitioner.
*/
private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
var prev: RDD[T],
f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext,
partition index, iterator)
preservesPartitioning: Boolean = false,
isFromBarrier: Boolean = false,
- isOrderSensitive: Boolean = false)
+ isOrderSensitive: Boolean = false,
+ knownPartitioner: Option[Partitioner] = None)
extends RDD[U](prev) {
- override val partitioner = if (preservesPartitioning)
firstParent[T].partitioner else None
+ override val partitioner = {
+ if (preservesPartitioning) {
+ firstParent[T].partitioner
+ } else {
+ knownPartitioner
+ }
+ }
--- End diff --
yes
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]