Github user JoshRosen commented on a diff in the pull request:
https://github.com/apache/spark/pull/15986#discussion_r89275105
--- Diff:
core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala ---
@@ -335,31 +337,31 @@ private[spark] class TaskSchedulerImpl(
var reason: Option[ExecutorLossReason] = None
synchronized {
try {
- if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
- // We lost this entire executor, so remember that it's gone
- val execId = taskIdToExecutorId(tid)
-
- if (executorIdToTaskCount.contains(execId)) {
+ taskIdToTaskSetManager.get(tid) match {
+ case Some(taskSet) if state == TaskState.LOST =>
+ // TaskState.LOST is only used by the deprecated Mesos
fine-grained scheduling mode,
+ // where each executor corresponds to a single task, so mark
the executor as failed.
+ val execId = taskIdToExecutorId.getOrElse(tid, throw new
IllegalStateException(
+ "taskIdToTaskSetManager.contains(tid) <=>
taskIdToExecutorId.contains(tid)"))
reason = Some(
SlaveLost(s"Task $tid was lost, so marking the executor as
lost as well."))
removeExecutor(execId, reason.get)
failedExecutor = Some(execId)
- }
- }
- taskIdToTaskSetManager.get(tid) match {
+ taskSet.removeRunningTask(tid)
+ taskResultGetter.enqueueFailedTask(taskSet, tid, state,
serializedData)
case Some(taskSet) =>
if (TaskState.isFinished(state)) {
taskIdToTaskSetManager.remove(tid)
taskIdToExecutorId.remove(tid).foreach { execId =>
- if (executorIdToTaskCount.contains(execId)) {
- executorIdToTaskCount(execId) -= 1
+ if (executorIdToRunningTaskIds.contains(execId)) {
+ executorIdToRunningTaskIds(execId).remove(tid)
}
}
}
if (state == TaskState.FINISHED) {
taskSet.removeRunningTask(tid)
taskResultGetter.enqueueSuccessfulTask(taskSet, tid,
serializedData)
- } else if (Set(TaskState.FAILED, TaskState.KILLED,
TaskState.LOST).contains(state)) {
--- End diff --
Here, `TaskState.LOST` will have already been handled by the new case
above, so I removed it here because this case will never be hit.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]