spark git commit: [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config
Repository: spark Updated Branches: refs/heads/branch-2.0 579268426 -> 095ddb4c9 [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config ## What changes were proposed in this pull request? Reduce `spark.memory.fraction` default to 0.6 in order to make it fit within default JVM old generation size (2/3 heap). See JIRA discussion. This means a full cache doesn't spill into the new gen. CC andrewor14 ## How was this patch tested? Jenkins tests. Author: Sean Owen Closes #13618 from srowen/SPARK-15796. (cherry picked from commit 457126e420e66228cc68def4bc3d87e7a282069a) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/095ddb4c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/095ddb4c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/095ddb4c Branch: refs/heads/branch-2.0 Commit: 095ddb4c9e7ab9193c15c69eb057a9bb2dbdaed1 Parents: 5792684 Author: Sean Owen Authored: Thu Jun 16 23:04:10 2016 +0200 Committer: Sean Owen Committed: Thu Jun 16 23:04:19 2016 +0200 -- .../spark/memory/UnifiedMemoryManager.scala | 8 .../scala/org/apache/spark/DistributedSuite.scala | 2 +- docs/configuration.md | 7 --- docs/tuning.md| 18 +- 4 files changed, 26 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala index ae747c1..c7b36be 100644 --- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala +++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala @@ -25,9 +25,9 @@ import org.apache.spark.storage.BlockId * either side can borrow memory from the other. * * The region shared between execution and storage is a fraction of (the total heap space - 300MB) - * configurable through `spark.memory.fraction` (default 0.75). The position of the boundary + * configurable through `spark.memory.fraction` (default 0.6). The position of the boundary * within this space is further determined by `spark.memory.storageFraction` (default 0.5). - * This means the size of the storage region is 0.75 * 0.5 = 0.375 of the heap space by default. + * This means the size of the storage region is 0.6 * 0.5 = 0.3 of the heap space by default. * * Storage can borrow as much execution memory as is free until execution reclaims its space. * When this happens, cached blocks will be evicted from memory until sufficient borrowed @@ -187,7 +187,7 @@ object UnifiedMemoryManager { // Set aside a fixed amount of memory for non-storage, non-execution purposes. // This serves a function similar to `spark.memory.fraction`, but guarantees that we reserve // sufficient memory for the system even for small heaps. E.g. if we have a 1GB JVM, then - // the memory used for execution and storage will be (1024 - 300) * 0.75 = 543MB by default. + // the memory used for execution and storage will be (1024 - 300) * 0.6 = 434MB by default. private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024 def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = { @@ -223,7 +223,7 @@ object UnifiedMemoryManager { } } val usableMemory = systemMemory - reservedMemory -val memoryFraction = conf.getDouble("spark.memory.fraction", 0.75) +val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6) (usableMemory * memoryFraction).toLong } } http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/core/src/test/scala/org/apache/spark/DistributedSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala index 6e69fc4..0515e6e 100644 --- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala @@ -223,7 +223,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex test("compute when only some partitions fit in memory") { val size = 1 -val numPartitions = 10 +val numPartitions = 20 val conf = new SparkConf() .set("spark.storage.unrollMemoryThreshold", "1024") .set("spark.testing.memory", size.toString) http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/docs/configuration.md --
spark git commit: [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config
Repository: spark Updated Branches: refs/heads/master 36110a830 -> 457126e42 [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config ## What changes were proposed in this pull request? Reduce `spark.memory.fraction` default to 0.6 in order to make it fit within default JVM old generation size (2/3 heap). See JIRA discussion. This means a full cache doesn't spill into the new gen. CC andrewor14 ## How was this patch tested? Jenkins tests. Author: Sean Owen Closes #13618 from srowen/SPARK-15796. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/457126e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/457126e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/457126e4 Branch: refs/heads/master Commit: 457126e420e66228cc68def4bc3d87e7a282069a Parents: 36110a8 Author: Sean Owen Authored: Thu Jun 16 23:04:10 2016 +0200 Committer: Sean Owen Committed: Thu Jun 16 23:04:10 2016 +0200 -- .../spark/memory/UnifiedMemoryManager.scala | 8 .../scala/org/apache/spark/DistributedSuite.scala | 2 +- docs/configuration.md | 7 --- docs/tuning.md| 18 +- 4 files changed, 26 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala index ae747c1..c7b36be 100644 --- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala +++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala @@ -25,9 +25,9 @@ import org.apache.spark.storage.BlockId * either side can borrow memory from the other. * * The region shared between execution and storage is a fraction of (the total heap space - 300MB) - * configurable through `spark.memory.fraction` (default 0.75). The position of the boundary + * configurable through `spark.memory.fraction` (default 0.6). The position of the boundary * within this space is further determined by `spark.memory.storageFraction` (default 0.5). - * This means the size of the storage region is 0.75 * 0.5 = 0.375 of the heap space by default. + * This means the size of the storage region is 0.6 * 0.5 = 0.3 of the heap space by default. * * Storage can borrow as much execution memory as is free until execution reclaims its space. * When this happens, cached blocks will be evicted from memory until sufficient borrowed @@ -187,7 +187,7 @@ object UnifiedMemoryManager { // Set aside a fixed amount of memory for non-storage, non-execution purposes. // This serves a function similar to `spark.memory.fraction`, but guarantees that we reserve // sufficient memory for the system even for small heaps. E.g. if we have a 1GB JVM, then - // the memory used for execution and storage will be (1024 - 300) * 0.75 = 543MB by default. + // the memory used for execution and storage will be (1024 - 300) * 0.6 = 434MB by default. private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024 def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = { @@ -223,7 +223,7 @@ object UnifiedMemoryManager { } } val usableMemory = systemMemory - reservedMemory -val memoryFraction = conf.getDouble("spark.memory.fraction", 0.75) +val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6) (usableMemory * memoryFraction).toLong } } http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/core/src/test/scala/org/apache/spark/DistributedSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala index 6e69fc4..0515e6e 100644 --- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala @@ -223,7 +223,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex test("compute when only some partitions fit in memory") { val size = 1 -val numPartitions = 10 +val numPartitions = 20 val conf = new SparkConf() .set("spark.storage.unrollMemoryThreshold", "1024") .set("spark.testing.memory", size.toString) http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 32c3a92..f