Github user kayousterhout commented on a diff in the pull request:
https://github.com/apache/spark/pull/13603#discussion_r67775034
--- Diff:
core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala ---
@@ -272,6 +261,55 @@ class TaskSchedulerImplSuite extends SparkFunSuite
with LocalSparkContext with L
val taskDescriptions3 = taskScheduler.resourceOffers(e1Offers).flatten
assert(1 === taskDescriptions3.length)
assert("executor1" === taskDescriptions3(0).executorId)
+ assert(!failedTaskSet)
+ }
+
+ test("abort stage if executor loss results in unschedulability from
previously failed tasks") {
+ // Make sure we can detect when a taskset becomes unschedulability
from a blacklisting. This
+ // test explores a particular corner case -- you may have one task
fail, but still be
+ // schedulable on another executor. However, that executor may fail
later on, leaving the
+ // first task with no place to run.
+ val taskScheduler = setupScheduler(
+ // set this to something much longer than the test duration
+ "spark.scheduler.executorTaskBlacklistTime" -> "10000000"
+ )
+
+ val taskSet = FakeTask.createTaskSet(2)
+ taskScheduler.submitTasks(taskSet)
+ val tsm = taskScheduler.taskSetManagerForAttempt(taskSet.stageId,
taskSet.stageAttemptId).get
+
+ val firstTasks = taskScheduler.resourceOffers(Seq(
+ new WorkerOffer("executor0", "host0", 1),
+ new WorkerOffer("executor1", "host1", 1)
+ )).flatten
+ assert(Set("executor0", "executor1") ===
firstTasks.map{_.executorId}.toSet)
+
+ // fail one of the tasks, but leave the other running
+ val failedTask = firstTasks.find(_.executorId == "executor0").get
+ taskScheduler.handleFailedTask(tsm, failedTask.taskId,
TaskState.FAILED, TaskResultLost)
+ // at this point, our failed task could run on the other executor, so
don't give up the task
+ // set yet.
--- End diff --
does it make sense to verify !failedTaskSet here?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]