Repository: spark
Updated Branches:
  refs/heads/master ffbbc2c58 -> fd1e8cddf


[SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis

Read `PEAK_EXECUTION_MEMORY` using `update` to get per task partial value 
instead of cumulative value.

I tested with this workload:

```scala
val size = 1000
val repetitions = 10
val data = sc.parallelize(1 to size, 5).map(x => (util.Random.nextInt(size / 
repetitions),util.Random.nextDouble)).toDF("key", "value")
val res = data.toDF.groupBy("key").agg(sum("value")).count
```

Before:
![image](https://cloud.githubusercontent.com/assets/4317392/9828197/07dd6874-58b8-11e5-9bd9-6ba927c38b26.png)

After:
![image](https://cloud.githubusercontent.com/assets/4317392/9828151/a5ddff30-58b7-11e5-8d31-eda5dc4eae79.png)

Tasks view:
![image](https://cloud.githubusercontent.com/assets/4317392/9828199/17dc2b84-58b8-11e5-92a8-be89ce4d29d1.png)

cc andrewor14 I appreciate if you can give feedback on this since I think you 
introduced display of this metric.

Author: Forest Fang <forest.f...@outlook.com>

Closes #8726 from saurfang/stagepage.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd1e8cdd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd1e8cdd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd1e8cdd

Branch: refs/heads/master
Commit: fd1e8cddf2635c55fec2ac6e1f1c221c9685af0f
Parents: ffbbc2c
Author: Forest Fang <forest.f...@outlook.com>
Authored: Mon Sep 14 15:07:13 2015 -0700
Committer: Andrew Or <and...@databricks.com>
Committed: Mon Sep 14 15:07:13 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/ui/jobs/StagePage.scala    |  2 +-
 .../org/apache/spark/ui/StagePageSuite.scala    | 29 +++++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala 
b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 4adc659..2b71f55 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -368,7 +368,7 @@ private[ui] class StagePage(parent: StagesTab) extends 
WebUIPage("stage") {
           val peakExecutionMemory = validTasks.map { case TaskUIData(info, _, 
_) =>
             info.accumulables
               .find { acc => acc.name == 
InternalAccumulator.PEAK_EXECUTION_MEMORY }
-              .map { acc => acc.value.toLong }
+              .map { acc => acc.update.getOrElse("0").toLong }
               .getOrElse(0L)
               .toDouble
           }

http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala 
b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 3388c6d..86699e7 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite, Success}
+import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab}
@@ -47,6 +47,14 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
     assert(html3.contains(targetString))
   }
 
+  test("SPARK-10543: peak execution memory should be per-task rather than 
cumulative") {
+    val unsafeConf = "spark.sql.unsafe.enabled"
+    val conf = new SparkConf(false).set(unsafeConf, "true")
+    val html = renderStagePage(conf).toString().toLowerCase
+    // verify min/25/50/75/max show task value not cumulative values
+    assert(html.contains("<td>10.0 b</td>" * 5))
+  }
+
   /**
    * Render a stage page started with the given conf and return the HTML.
    * This also runs a dummy stage to populate the page with useful content.
@@ -67,12 +75,19 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
 
     // Simulate a stage in job progress listener
     val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, 
"details")
-    val taskInfo = new TaskInfo(0, 0, 0, 0, "0", "localhost", 
TaskLocality.ANY, false)
-    jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo))
-    jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo))
-    taskInfo.markSuccessful()
-    jobListener.onTaskEnd(
-      SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, 
TaskMetrics.empty))
+    // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness
+    (1 to 2).foreach {
+      taskId =>
+        val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", 
TaskLocality.ANY, false)
+        val peakExecutionMemory = 10
+        taskInfo.accumulables += new AccumulableInfo(0, 
InternalAccumulator.PEAK_EXECUTION_MEMORY,
+          Some(peakExecutionMemory.toString), (peakExecutionMemory * 
taskId).toString, true)
+        jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo))
+        jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo))
+        taskInfo.markSuccessful()
+        jobListener.onTaskEnd(
+          SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, 
TaskMetrics.empty))
+    }
     jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo))
     page.render(request)
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to