agrawaldevesh commented on a change in pull request #29452:
URL: https://github.com/apache/spark/pull/29452#discussion_r472756046



##########
File path: core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
##########
@@ -1051,15 +1049,19 @@ private[spark] class TaskSetManager(
       logDebug("Task length threshold for speculation: " + threshold)
       for (tid <- runningTasksSet) {
         var speculated = checkAndSubmitSpeculatableTask(tid, time, threshold)
-        if (!speculated && tidToExecutorKillTimeMapping.contains(tid)) {
-          // Check whether this task will finish before the exectorKillTime 
assuming
-          // it will take medianDuration overall. If this task cannot finish 
within
-          // executorKillInterval, then this task is a candidate for 
speculation
-          val taskEndTimeBasedOnMedianDuration = taskInfos(tid).launchTime + 
medianDuration
-          val canExceedDeadline = tidToExecutorKillTimeMapping(tid) <
-            taskEndTimeBasedOnMedianDuration
-          if (canExceedDeadline) {
-            speculated = checkAndSubmitSpeculatableTask(tid, time, 0)
+        if (!speculated && executorDecommissionKillInterval.nonEmpty) {
+          val taskInfo = taskInfos(tid)
+          val decomState = 
sched.getExecutorDecommissionState(taskInfo.executorId)
+          if (decomState.nonEmpty) {
+            // Check whether this task will finish before the exectorKillTime 
assuming

Review comment:
       Good point. reworked the comment.

##########
File path: 
core/src/main/scala/org/apache/spark/scheduler/ExecutorDecommissionInfo.scala
##########
@@ -18,11 +18,22 @@
 package org.apache.spark.scheduler
 
 /**
- * Provides more detail when an executor is being decommissioned.
+ * Message providing more detail when an executor is being decommissioned.
  * @param message Human readable reason for why the decommissioning is 
happening.
  * @param isHostDecommissioned Whether the host (aka the `node` or `worker` in 
other places) is
  *                             being decommissioned too. Used to infer if the 
shuffle data might
  *                             be lost even if the external shuffle service is 
enabled.
  */
 private[spark]
 case class ExecutorDecommissionInfo(message: String, isHostDecommissioned: 
Boolean)
+
+/**
+ * State related to decommissioning that is kept by the TaskSchedulerImpl. 
This state is derived
+ * from the info message above but it is kept distinct to allow the state to 
evolve independently
+ * from the message.
+ */
+case class ExecutorDecommissionState(
+    message: String,
+    // Timestamp in milliseconds when decommissioning was triggered
+    tsMillis: Long,

Review comment:
       Sure. good idea. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to