Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19828#discussion_r153857985
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---
@@ -2733,23 +2733,63 @@ class Dataset[T] private[sql](
*/
@scala.annotation.varargs
def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T]
= withTypedPlan {
+ // The underlying `LogicalPlan` operator special-cases all-`SortOrder`
arguments.
+ // However, we don't want to complicate the semantics of this API
method. Instead, let's
+ // give users a friendly error message, pointing them to the new
method.
+ val sortOrders = partitionExprs.filter(_.expr.isInstanceOf[SortOrder])
+ if (sortOrders.nonEmpty) throw new IllegalArgumentException(
+ s"""Invalid partitionExprs specified: $sortOrders
+ |For range partitioning use repartitionByRange(...) instead.
+ """.stripMargin)
RepartitionByExpression(partitionExprs.map(_.expr), logicalPlan,
numPartitions)
}
/**
- * Returns a new Dataset partitioned by the given partitioning
expressions, using
- * `spark.sql.shuffle.partitions` as number of partitions.
- * The resulting Dataset is hash partitioned.
+ * Returns a new Dataset that is hash partitioned by the given
expressions, using
+ * `spark.sql.shuffle.partitions` as the number of partitions. If no
expressions are specified,
+ * round robin partitioning is used.
*
* This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
*
* @group typedrel
* @since 2.0.0
*/
@scala.annotation.varargs
- def repartition(partitionExprs: Column*): Dataset[T] = withTypedPlan {
- RepartitionByExpression(
- partitionExprs.map(_.expr), logicalPlan,
sparkSession.sessionState.conf.numShufflePartitions)
+ def repartition(partitionExprs: Column*): Dataset[T] = {
+ repartition(sparkSession.sessionState.conf.numShufflePartitions,
partitionExprs: _*)
+ }
+
+ /**
+ * Returns a new Dataset that is hash partitioned by the given
expressions into `numPartitions`.
--- End diff --
`hash` -> `range`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]