Github user srowen commented on a diff in the pull request:
https://github.com/apache/spark/pull/22356#discussion_r215737907
--- Diff: core/src/test/scala/org/apache/spark/FileSuite.scala ---
@@ -299,6 +301,25 @@ class FileSuite extends SparkFunSuite with
LocalSparkContext {
}
}
+ test("SPARK-22357 test binaryFiles minPartitions") {
+ sc = new SparkContext(new
SparkConf().setAppName("test").setMaster("local")
+ .set("spark.files.openCostInBytes", "0")
--- End diff --
This removes its effect in the section of code we're really trying to test:
```
def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions:
Int) {
val defaultMaxSplitBytes =
sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
val defaultParallelism = Math.max(sc.defaultParallelism, minPartitions)
val files = listStatus(context).asScala
val totalBytes = files.filterNot(_.isDirectory).map(_.getLen +
openCostInBytes).sum
val bytesPerCore = totalBytes / defaultParallelism
val maxSplitSize = Math.min(defaultMaxSplitBytes,
Math.max(openCostInBytes, bytesPerCore))
super.setMaxSplitSize(maxSplitSize)
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]