Repository: spark Updated Branches: refs/heads/master 9ccc53c72 -> 73fedf5a6
[Spark-4484] Treat maxResultSize as unlimited when set to 0; improve error message The check for maxResultSize > 0 is missing, results in failures. Also, error message needs to be improved so the developers know that there is a new parameter to be configured Author: Nishkam Ravi <nr...@cloudera.com> Author: nravi <nr...@c1704.halxg.cloudera.com> Author: nishkamravi2 <nishkamr...@gmail.com> Closes #3360 from nishkamravi2/master_nravi and squashes the following commits: 5c9a4cb [nishkamravi2] Update TaskSetManagerSuite.scala 535295a [nishkamravi2] Update TaskSetManager.scala 3e1b616 [Nishkam Ravi] Modify test for maxResultSize 9f6583e [Nishkam Ravi] Changes to maxResultSize code (improve error message and add condition to check if maxResultSize > 0) 5f8f9ed [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 636a9ff [nishkamravi2] Update YarnAllocator.scala 8f76c8b [Nishkam Ravi] Doc change for yarn memory overhead 35daa64 [Nishkam Ravi] Slight change in the doc for yarn memory overhead 5ac2ec1 [Nishkam Ravi] Remove out dac1047 [Nishkam Ravi] Additional documentation for yarn memory overhead issue 42c2c3d [Nishkam Ravi] Additional changes for yarn memory overhead issue 362da5e [Nishkam Ravi] Additional changes for yarn memory overhead c726bd9 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi f00fa31 [Nishkam Ravi] Improving logging for AM memoryOverhead 1cf2d1e [nishkamravi2] Update YarnAllocator.scala ebcde10 [Nishkam Ravi] Modify default YARN memory_overhead-- from an additive constant to a multiplier (redone to resolve merge conflicts) 2e69f11 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi efd688a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark 2b630f9 [nravi] Accept memory input as "30g", "512M" instead of an int value, to be consistent with rest of Spark 3bf8fad [nravi] Merge branch 'master' of https://github.com/apache/spark 5423a03 [nravi] Merge branch 'master' of https://github.com/apache/spark eb663ca [nravi] Merge branch 'master' of https://github.com/apache/spark df2aeb1 [nravi] Improved fix for ConcurrentModificationIssue (Spark-1097, Hadoop-10456) 6b840f0 [nravi] Undo the fix for SPARK-1758 (the problem is fixed) 5108700 [nravi] Fix in Spark for the Concurrent thread modification issue (SPARK-1097, HADOOP-10456) 681b36f [nravi] Fix for SPARK-1758: failing test org.apache.spark.JavaAPISuite.wholeTextFiles Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73fedf5a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73fedf5a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73fedf5a Branch: refs/heads/master Commit: 73fedf5a6e662b640dfe29936753721988bff6ea Parents: 9ccc53c Author: Nishkam Ravi <nr...@cloudera.com> Authored: Wed Nov 19 17:23:42 2014 -0800 Committer: Andrew Or <and...@databricks.com> Committed: Wed Nov 19 17:23:42 2014 -0800 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/executor/Executor.scala | 2 +- .../main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 2 +- .../scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/73fedf5a/core/src/main/scala/org/apache/spark/executor/Executor.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 4c378a2..5fa5845 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -221,7 +221,7 @@ private[spark] class Executor( // directSend = sending directly back to the driver val serializedResult = { - if (resultSize > maxResultSize) { + if (maxResultSize > 0 && resultSize > maxResultSize) { logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " + s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " + s"dropping it.") http://git-wip-us.apache.org/repos/asf/spark/blob/73fedf5a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index d8fb640..cabdc65 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -536,7 +536,7 @@ private[spark] class TaskSetManager( calculatedTasks += 1 if (maxResultSize > 0 && totalResultSize > maxResultSize) { val msg = s"Total size of serialized results of ${calculatedTasks} tasks " + - s"(${Utils.bytesToString(totalResultSize)}) is bigger than maxResultSize " + + s"(${Utils.bytesToString(totalResultSize)}) is bigger than spark.driver.maxResultSize " + s"(${Utils.bytesToString(maxResultSize)})" logError(msg) abort(msg) http://git-wip-us.apache.org/repos/asf/spark/blob/73fedf5a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala index 1809b53..4721915 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala @@ -579,13 +579,13 @@ class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging { // single 10M result val thrown = intercept[SparkException] {sc.makeRDD(genBytes(10 << 20)(0), 1).collect()} - assert(thrown.getMessage().contains("bigger than maxResultSize")) + assert(thrown.getMessage().contains("bigger than spark.driver.maxResultSize")) // multiple 1M results val thrown2 = intercept[SparkException] { sc.makeRDD(0 until 10, 10).map(genBytes(1 << 20)).collect() } - assert(thrown2.getMessage().contains("bigger than maxResultSize")) + assert(thrown2.getMessage().contains("bigger than spark.driver.maxResultSize")) } test("speculative and noPref task should be scheduled after node-local") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org