Github user mridulm commented on a diff in the pull request: https://github.com/apache/spark/pull/22010#discussion_r218946996 --- Diff: core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala --- @@ -35,16 +35,24 @@ import org.apache.spark.{Partition, TaskContext} * @param isOrderSensitive whether or not the function is order-sensitive. If it's order * sensitive, it may return totally different result when the input order * is changed. Mostly stateful functions are order-sensitive. + * @param knownPartitioner If the result has a known partitioner. */ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag]( var prev: RDD[T], f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator) preservesPartitioning: Boolean = false, isFromBarrier: Boolean = false, - isOrderSensitive: Boolean = false) + isOrderSensitive: Boolean = false, + knownPartitioner: Option[Partitioner] = None) extends RDD[U](prev) { - override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None + override val partitioner = { + if (preservesPartitioning) { + firstParent[T].partitioner + } else { + knownPartitioner + } + } --- End diff -- yes
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org