spark git commit: [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis
Repository: spark Updated Branches: refs/heads/master ffbbc2c58 -> fd1e8cddf [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis Read `PEAK_EXECUTION_MEMORY` using `update` to get per task partial value instead of cumulative value. I tested with this workload: ```scala val size = 1000 val repetitions = 10 val data = sc.parallelize(1 to size, 5).map(x => (util.Random.nextInt(size / repetitions),util.Random.nextDouble)).toDF("key", "value") val res = data.toDF.groupBy("key").agg(sum("value")).count ``` Before: ![image](https://cloud.githubusercontent.com/assets/4317392/9828197/07dd6874-58b8-11e5-9bd9-6ba927c38b26.png) After: ![image](https://cloud.githubusercontent.com/assets/4317392/9828151/a5ddff30-58b7-11e5-8d31-eda5dc4eae79.png) Tasks view: ![image](https://cloud.githubusercontent.com/assets/4317392/9828199/17dc2b84-58b8-11e5-92a8-be89ce4d29d1.png) cc andrewor14 I appreciate if you can give feedback on this since I think you introduced display of this metric. Author: Forest FangCloses #8726 from saurfang/stagepage. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd1e8cdd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd1e8cdd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd1e8cdd Branch: refs/heads/master Commit: fd1e8cddf2635c55fec2ac6e1f1c221c9685af0f Parents: ffbbc2c Author: Forest Fang Authored: Mon Sep 14 15:07:13 2015 -0700 Committer: Andrew Or Committed: Mon Sep 14 15:07:13 2015 -0700 -- .../org/apache/spark/ui/jobs/StagePage.scala| 2 +- .../org/apache/spark/ui/StagePageSuite.scala| 29 +++- 2 files changed, 23 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 4adc659..2b71f55 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -368,7 +368,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val peakExecutionMemory = validTasks.map { case TaskUIData(info, _, _) => info.accumulables .find { acc => acc.name == InternalAccumulator.PEAK_EXECUTION_MEMORY } - .map { acc => acc.value.toLong } + .map { acc => acc.update.getOrElse("0").toLong } .getOrElse(0L) .toDouble } http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala index 3388c6d..86699e7 100644 --- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala @@ -23,7 +23,7 @@ import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} -import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite, Success} +import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} @@ -47,6 +47,14 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext { assert(html3.contains(targetString)) } + test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { +val unsafeConf = "spark.sql.unsafe.enabled" +val conf = new SparkConf(false).set(unsafeConf, "true") +val html = renderStagePage(conf).toString().toLowerCase +// verify min/25/50/75/max show task value not cumulative values +assert(html.contains("10.0 b" * 5)) + } + /** * Render a stage page started with the given conf and return the HTML. * This also runs a dummy stage to populate the page with useful content. @@ -67,12 +75,19 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext { // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") -val taskInfo = new TaskInfo(0, 0, 0, 0, "0", "localhost", TaskLocality.ANY, false) -jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) -jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) -taskInfo.markSuccessful() -jobListener.onTaskEnd( -
spark git commit: [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis
Repository: spark Updated Branches: refs/heads/branch-1.5 0e1c9d9ff -> eb0cb25bb [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis Read `PEAK_EXECUTION_MEMORY` using `update` to get per task partial value instead of cumulative value. I tested with this workload: ```scala val size = 1000 val repetitions = 10 val data = sc.parallelize(1 to size, 5).map(x => (util.Random.nextInt(size / repetitions),util.Random.nextDouble)).toDF("key", "value") val res = data.toDF.groupBy("key").agg(sum("value")).count ``` Before: ![image](https://cloud.githubusercontent.com/assets/4317392/9828197/07dd6874-58b8-11e5-9bd9-6ba927c38b26.png) After: ![image](https://cloud.githubusercontent.com/assets/4317392/9828151/a5ddff30-58b7-11e5-8d31-eda5dc4eae79.png) Tasks view: ![image](https://cloud.githubusercontent.com/assets/4317392/9828199/17dc2b84-58b8-11e5-92a8-be89ce4d29d1.png) cc andrewor14 I appreciate if you can give feedback on this since I think you introduced display of this metric. Author: Forest FangCloses #8726 from saurfang/stagepage. (cherry picked from commit fd1e8cddf2635c55fec2ac6e1f1c221c9685af0f) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb0cb25b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb0cb25b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb0cb25b Branch: refs/heads/branch-1.5 Commit: eb0cb25bb81a5aa271d2a0266e5a31b36d1fc071 Parents: 0e1c9d9 Author: Forest Fang Authored: Mon Sep 14 15:07:13 2015 -0700 Committer: Andrew Or Committed: Mon Sep 14 15:07:24 2015 -0700 -- .../org/apache/spark/ui/jobs/StagePage.scala| 2 +- .../org/apache/spark/ui/StagePageSuite.scala| 29 +++- 2 files changed, 23 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb0cb25b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 4adc659..2b71f55 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -368,7 +368,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val peakExecutionMemory = validTasks.map { case TaskUIData(info, _, _) => info.accumulables .find { acc => acc.name == InternalAccumulator.PEAK_EXECUTION_MEMORY } - .map { acc => acc.value.toLong } + .map { acc => acc.update.getOrElse("0").toLong } .getOrElse(0L) .toDouble } http://git-wip-us.apache.org/repos/asf/spark/blob/eb0cb25b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala index 3388c6d..86699e7 100644 --- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala @@ -23,7 +23,7 @@ import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} -import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite, Success} +import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} @@ -47,6 +47,14 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext { assert(html3.contains(targetString)) } + test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { +val unsafeConf = "spark.sql.unsafe.enabled" +val conf = new SparkConf(false).set(unsafeConf, "true") +val html = renderStagePage(conf).toString().toLowerCase +// verify min/25/50/75/max show task value not cumulative values +assert(html.contains("10.0 b" * 5)) + } + /** * Render a stage page started with the given conf and return the HTML. * This also runs a dummy stage to populate the page with useful content. @@ -67,12 +75,19 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext { // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") -val taskInfo = new TaskInfo(0, 0, 0, 0, "0", "localhost", TaskLocality.ANY, false) -jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) -