Updated Branches:
  refs/heads/master 18d6df0e1 -> cb976dfb5

Improve docs for shuffle instrumentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/297c09d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/297c09d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/297c09d4

Branch: refs/heads/master
Commit: 297c09d4bb26ba815c7fcb0a0ff04974959f551e
Parents: 62889c4
Author: Patrick Wendell <[email protected]>
Authored: Mon Nov 25 22:51:33 2013 -0800
Committer: Patrick Wendell <[email protected]>
Committed: Mon Nov 25 22:53:28 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/executor/TaskMetrics.scala | 23 +++++++++++---------
 1 file changed, 13 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/297c09d4/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala 
b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 0b4892f..c0ce46e 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -61,50 +61,53 @@ object TaskMetrics {
 
 class ShuffleReadMetrics extends Serializable {
   /**
-   * Time when shuffle finishs
+   * Absolute time when this task finished reading shuffle data
    */
   var shuffleFinishTime: Long = _
 
   /**
-   * Total number of blocks fetched in a shuffle (remote or local)
+   * Number of blocks fetched in this shuffle by this task (remote or local)
    */
   var totalBlocksFetched: Int = _
 
   /**
-   * Number of remote blocks fetched in a shuffle
+   * Number of remote blocks fetched in this shuffle by this task
    */
   var remoteBlocksFetched: Int = _
 
   /**
-   * Local blocks fetched in a shuffle
+   * Number of local blocks fetched in this shuffle by this task
    */
   var localBlocksFetched: Int = _
 
   /**
-   * Total time that is spent blocked waiting for shuffle to fetch data
+   * Time the task spent waiting for remote shuffle blocks. This only includes 
the time
+   * blocking on shuffle input data. For instance if block B is being fetched 
while the task is
+   * still not finished processing block A, it is not considered to be 
blocking on block B.
    */
   var fetchWaitTime: Long = _
 
   /**
-   * The total amount of time for all the shuffle fetches.  This adds up time 
from overlapping
-   *     shuffles, so can be longer than task time
+   * Total time spent fetching remote shuffle blocks. This aggregates the time 
spent fetching all
+   * input blocks. Since block fetches are both pipelined and parallelized, 
this can
+   * exceed fetchWaitTime and executorRunTime.
    */
   var remoteFetchTime: Long = _
 
   /**
-   * Total number of remote bytes read from a shuffle
+   * Total number of remote bytes read from the shuffle by this task
    */
   var remoteBytesRead: Long = _
 }
 
 class ShuffleWriteMetrics extends Serializable {
   /**
-   * Number of bytes written for a shuffle
+   * Number of bytes written for the shuffle by this task
    */
   var shuffleBytesWritten: Long = _
 
   /**
-   * Time spent blocking on writes to disk or buffer cache, in nanoseconds.
+   * Time the task spent blocking on writes to disk or buffer cache, in 
nanoseconds
    */
   var shuffleWriteTime: Long = _
 }

Reply via email to