Github user mridulm commented on a diff in the pull request:
https://github.com/apache/spark/pull/20091#discussion_r162778240
--- Diff: core/src/test/scala/org/apache/spark/PartitioningSuite.scala ---
@@ -284,7 +284,38 @@ class PartitioningSuite extends SparkFunSuite with
SharedSparkContext with Priva
assert(partitioner3.numPartitions == rdd3.getNumPartitions)
assert(partitioner4.numPartitions == rdd3.getNumPartitions)
assert(partitioner5.numPartitions == rdd4.getNumPartitions)
+ }
+ test("defaultPartitioner when defaultParallelism is set") {
+ assert(!sc.conf.contains("spark.default.parallelism"))
+ try {
+ sc.conf.set("spark.default.parallelism", "4")
+
+ val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 150)
+ val rdd2 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+ .partitionBy(new HashPartitioner(10))
+ val rdd3 = sc.parallelize(Array((1, 6), (7, 8), (3, 10), (5, 12),
(13, 14)))
+ .partitionBy(new HashPartitioner(100))
+ val rdd4 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+ .partitionBy(new HashPartitioner(9))
+ val rdd5 = sc.parallelize((1 to 10).map(x => (x, x)), 11)
--- End diff --
Can we add a case where partitioner is not used and default (from
spark.default.parallelism) gets used ?
For example, something like the following pseudo
```
val rdd6 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3,
4))).partitionBy(new HashPartitioner(3))
...
Partitioner.defaultPartitioner(rdd1, rdd6).numPartitions ==
sc.conf.get("spark.default.parallelism").toInt
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]