spark git commit: [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis

2015-09-14 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master ffbbc2c58 -> fd1e8cddf


[SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis

Read `PEAK_EXECUTION_MEMORY` using `update` to get per task partial value 
instead of cumulative value.

I tested with this workload:

```scala
val size = 1000
val repetitions = 10
val data = sc.parallelize(1 to size, 5).map(x => (util.Random.nextInt(size / 
repetitions),util.Random.nextDouble)).toDF("key", "value")
val res = data.toDF.groupBy("key").agg(sum("value")).count
```

Before:
![image](https://cloud.githubusercontent.com/assets/4317392/9828197/07dd6874-58b8-11e5-9bd9-6ba927c38b26.png)

After:
![image](https://cloud.githubusercontent.com/assets/4317392/9828151/a5ddff30-58b7-11e5-8d31-eda5dc4eae79.png)

Tasks view:
![image](https://cloud.githubusercontent.com/assets/4317392/9828199/17dc2b84-58b8-11e5-92a8-be89ce4d29d1.png)

cc andrewor14 I appreciate if you can give feedback on this since I think you 
introduced display of this metric.

Author: Forest Fang 

Closes #8726 from saurfang/stagepage.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd1e8cdd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd1e8cdd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd1e8cdd

Branch: refs/heads/master
Commit: fd1e8cddf2635c55fec2ac6e1f1c221c9685af0f
Parents: ffbbc2c
Author: Forest Fang 
Authored: Mon Sep 14 15:07:13 2015 -0700
Committer: Andrew Or 
Committed: Mon Sep 14 15:07:13 2015 -0700

--
 .../org/apache/spark/ui/jobs/StagePage.scala|  2 +-
 .../org/apache/spark/ui/StagePageSuite.scala| 29 +++-
 2 files changed, 23 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala 
b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 4adc659..2b71f55 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -368,7 +368,7 @@ private[ui] class StagePage(parent: StagesTab) extends 
WebUIPage("stage") {
   val peakExecutionMemory = validTasks.map { case TaskUIData(info, _, 
_) =>
 info.accumulables
   .find { acc => acc.name == 
InternalAccumulator.PEAK_EXECUTION_MEMORY }
-  .map { acc => acc.value.toLong }
+  .map { acc => acc.update.getOrElse("0").toLong }
   .getOrElse(0L)
   .toDouble
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fd1e8cdd/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala 
b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 3388c6d..86699e7 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite, Success}
+import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab}
@@ -47,6 +47,14 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
 assert(html3.contains(targetString))
   }
 
+  test("SPARK-10543: peak execution memory should be per-task rather than 
cumulative") {
+val unsafeConf = "spark.sql.unsafe.enabled"
+val conf = new SparkConf(false).set(unsafeConf, "true")
+val html = renderStagePage(conf).toString().toLowerCase
+// verify min/25/50/75/max show task value not cumulative values
+assert(html.contains("10.0 b" * 5))
+  }
+
   /**
* Render a stage page started with the given conf and return the HTML.
* This also runs a dummy stage to populate the page with useful content.
@@ -67,12 +75,19 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
 
 // Simulate a stage in job progress listener
 val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, 
"details")
-val taskInfo = new TaskInfo(0, 0, 0, 0, "0", "localhost", 
TaskLocality.ANY, false)
-jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo))
-jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo))
-taskInfo.markSuccessful()
-jobListener.onTaskEnd(
-  

spark git commit: [SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis

2015-09-14 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 0e1c9d9ff -> eb0cb25bb


[SPARK-10543] [CORE] Peak Execution Memory Quantile should be Per-task Basis

Read `PEAK_EXECUTION_MEMORY` using `update` to get per task partial value 
instead of cumulative value.

I tested with this workload:

```scala
val size = 1000
val repetitions = 10
val data = sc.parallelize(1 to size, 5).map(x => (util.Random.nextInt(size / 
repetitions),util.Random.nextDouble)).toDF("key", "value")
val res = data.toDF.groupBy("key").agg(sum("value")).count
```

Before:
![image](https://cloud.githubusercontent.com/assets/4317392/9828197/07dd6874-58b8-11e5-9bd9-6ba927c38b26.png)

After:
![image](https://cloud.githubusercontent.com/assets/4317392/9828151/a5ddff30-58b7-11e5-8d31-eda5dc4eae79.png)

Tasks view:
![image](https://cloud.githubusercontent.com/assets/4317392/9828199/17dc2b84-58b8-11e5-92a8-be89ce4d29d1.png)

cc andrewor14 I appreciate if you can give feedback on this since I think you 
introduced display of this metric.

Author: Forest Fang 

Closes #8726 from saurfang/stagepage.

(cherry picked from commit fd1e8cddf2635c55fec2ac6e1f1c221c9685af0f)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb0cb25b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb0cb25b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb0cb25b

Branch: refs/heads/branch-1.5
Commit: eb0cb25bb81a5aa271d2a0266e5a31b36d1fc071
Parents: 0e1c9d9
Author: Forest Fang 
Authored: Mon Sep 14 15:07:13 2015 -0700
Committer: Andrew Or 
Committed: Mon Sep 14 15:07:24 2015 -0700

--
 .../org/apache/spark/ui/jobs/StagePage.scala|  2 +-
 .../org/apache/spark/ui/StagePageSuite.scala| 29 +++-
 2 files changed, 23 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/eb0cb25b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala 
b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 4adc659..2b71f55 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -368,7 +368,7 @@ private[ui] class StagePage(parent: StagesTab) extends 
WebUIPage("stage") {
   val peakExecutionMemory = validTasks.map { case TaskUIData(info, _, 
_) =>
 info.accumulables
   .find { acc => acc.name == 
InternalAccumulator.PEAK_EXECUTION_MEMORY }
-  .map { acc => acc.value.toLong }
+  .map { acc => acc.update.getOrElse("0").toLong }
   .getOrElse(0L)
   .toDouble
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/eb0cb25b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala 
b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 3388c6d..86699e7 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite, Success}
+import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab}
@@ -47,6 +47,14 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
 assert(html3.contains(targetString))
   }
 
+  test("SPARK-10543: peak execution memory should be per-task rather than 
cumulative") {
+val unsafeConf = "spark.sql.unsafe.enabled"
+val conf = new SparkConf(false).set(unsafeConf, "true")
+val html = renderStagePage(conf).toString().toLowerCase
+// verify min/25/50/75/max show task value not cumulative values
+assert(html.contains("10.0 b" * 5))
+  }
+
   /**
* Render a stage page started with the given conf and return the HTML.
* This also runs a dummy stage to populate the page with useful content.
@@ -67,12 +75,19 @@ class StagePageSuite extends SparkFunSuite with 
LocalSparkContext {
 
 // Simulate a stage in job progress listener
 val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, 
"details")
-val taskInfo = new TaskInfo(0, 0, 0, 0, "0", "localhost", 
TaskLocality.ANY, false)
-jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo))
-