Github user squito commented on a diff in the pull request:
https://github.com/apache/spark/pull/21221#discussion_r195289751
--- Diff:
core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala ---
@@ -169,6 +182,31 @@ private[spark] class EventLoggingListener(
// Events that trigger a flush
override def onStageCompleted(event: SparkListenerStageCompleted): Unit
= {
+ if (shouldLogExecutorMetricsUpdates) {
+ // clear out any previous attempts, that did not have a stage
completed event
+ val prevAttemptId = event.stageInfo.attemptNumber() - 1
+ for (attemptId <- 0 to prevAttemptId) {
+ liveStageExecutorMetrics.remove((event.stageInfo.stageId,
attemptId))
+ }
+
+ // log the peak executor metrics for the stage, for each live
executor,
+ // whether or not the executor is running tasks for the stage
+ val accumUpdates = new ArrayBuffer[(Long, Int, Int,
Seq[AccumulableInfo])]()
+ val executorMap = liveStageExecutorMetrics.remove(
+ (event.stageInfo.stageId, event.stageInfo.attemptNumber()))
+ executorMap.foreach {
+ executorEntry => {
+ for ((executorId, peakExecutorMetrics) <- executorEntry) {
+ val executorMetrics = new ExecutorMetrics(-1,
peakExecutorMetrics.metrics)
--- End diff --
why is the timestamp -1 here? if we're always logging it as -1, it doesn't
seem very useful
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]