[ https://issues.apache.org/jira/browse/SPARK-34488?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ron Hu updated SPARK-34488: --------------------------- Attachment: taskMetricsDistributions.json > Support task Metrics Distributions and executor Metrics Distributions in the > REST API entry for a specified stage > ----------------------------------------------------------------------------------------------------------------- > > Key: SPARK-34488 > URL: https://issues.apache.org/jira/browse/SPARK-34488 > Project: Spark > Issue Type: Sub-task > Components: Spark Core > Affects Versions: 3.0.2 > Reporter: Ron Hu > Priority: Major > Attachments: executorMetricsDistributions.json, > taskMetricsDistributions.json > > > For a specific stage, it is useful to show the task metrics in percentile > distribution. This information can help users know whether or not there is a > skew/bottleneck among tasks in a given stage. We list an example in > Here is an example, > > "taskMetricsDistributions" : > { "quantiles" : [ 0.0, 0.25, 0.5, 0.75, 1.0 ], > "executorDeserializeTime" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ], > "executorDeserializeCpuTime" : [ 1195000.0, 1195000.0, 1195000.0, 1195000.0, > 1195000.0 ], "executorRunTime" : [ 3.0, 3.0, 3.0, 3.0, 3.0 ], > "executorCpuTime" : [ 3210000.0, 3210000.0, 3210000.0, 3210000.0, 3210000.0 > ], "resultSize" : [ 2648.0, 2648.0, 2648.0, 2648.0, 2648.0 ], > "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "resultSerializationTime" : [ > 0.0, 0.0, 0.0, 0.0, 0.0 ], "gettingResultTime" : [ 0.0, 0.0, 0.0, 0.0, > 0.0 ], "schedulerDelay" : [ 2.0, 2.0, 2.0, 2.0, 2.0 ], > "peakExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "memoryBytesSpilled" > : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, > 0.0 ], "inputMetrics" : \\{"bytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "recordsRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] } > , > "outputMetrics" : \{"bytesWritten" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "recordsWritten" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] }, > "shuffleReadMetrics" : \{"readBytes" : [ 236.0, 236.0, 236.0, 236.0, > 236.0 ], "readRecords" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], > "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "localBlocksFetched" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], "fetchWaitTime" : [ > 0.0, 0.0, 0.0, 0.0, 0.0 ], "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, > 0.0 ], "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "totalBlocksFetched" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ] }, > "shuffleWriteMetrics" : \{"writeBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "writeRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "writeTime" : [ 0.0, 0.0, 0.0, > 0.0, 0.0 ] } > } > > Similarly, it is useful to show the executor metrics in percentile > distribution for a specific stage. > Here is an example: > > "executorMetricsDistributions" : { > "quantiles" : [ 0.0, 0.25, 0.5, 0.75, 1.0 ], > "taskTime" : [ 6.0, 6.0, 6.0, 6.0, 6.0 ], > "failedTasks" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "succeededTasks" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ], > "killedTasks" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "inputBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "inputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "outputBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "outputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "shuffleRead" : [ 236.0, 236.0, 236.0, 236.0, 236.0 ], > "shuffleReadRecords" : [ 4.0, 4.0, 4.0, 4.0, 4.0 ], > "shuffleWrite" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "shuffleWriteRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "peakMemoryMetrics" : > { "JVMHeapMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "JVMOffHeapMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OnHeapExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OffHeapExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OnHeapStorageMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OffHeapStorageMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OnHeapUnifiedMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "OffHeapUnifiedMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "DirectPoolMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MappedPoolMemory" : > [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "ProcessTreeJVMVMemory" : [ 0.0, 0.0, 0.0, > 0.0, 0.0 ], "ProcessTreeJVMRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "ProcessTreePythonVMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "ProcessTreePythonRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "ProcessTreeOtherVMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "ProcessTreeOtherRSSMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "MinorGCCount" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], "MinorGCTime" : [ 0.0, > 0.0, 0.0, 0.0, 0.0 ], "MajorGCCount" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], > "MajorGCTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ] } > } > > We use withSummaries query parameter in the REST API for a specific stage as: > applications/<application_id>/<application_attempt/stages/<stage_id>/<stage_attempt>?withSummaries=[true|false] > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org