Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19828#discussion_r153273279
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---
@@ -2747,9 +2755,41 @@ class Dataset[T] private[sql](
* @since 2.0.0
*/
@scala.annotation.varargs
- def repartition(partitionExprs: Column*): Dataset[T] = withTypedPlan {
- RepartitionByExpression(
- partitionExprs.map(_.expr), logicalPlan,
sparkSession.sessionState.conf.numShufflePartitions)
+ def repartition(partitionExprs: Column*): Dataset[T] = {
+ repartition(sparkSession.sessionState.conf.numShufflePartitions,
partitionExprs: _*)
+ }
+
+ /**
+ * Returns a new Dataset partitioned by the given partitioning
expressions into
+ * `numPartitions`. The resulting Dataset is range partitioned.
+ *
+ * @group typedrel
+ * @since 2.3.0
+ */
+ @scala.annotation.varargs
+ def repartitionByRange(numPartitions: Int, partitionExprs: Column*):
Dataset[T] = withTypedPlan {
--- End diff --
Open a JIRA for adding the corresponding API in PySpark?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]