Github user jiangxb1987 commented on a diff in the pull request:
https://github.com/apache/spark/pull/18427#discussion_r125952506
--- Diff:
core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala ---
@@ -1172,6 +1172,50 @@ class TaskSetManagerSuite extends SparkFunSuite with
LocalSparkContext with Logg
assert(blacklistTracker.isNodeBlacklisted("host1"))
}
+ test("update blacklist before adding pending task to avoid race
condition") {
+ // When a task fails, it should apply the blacklist policy prior to
+ // retrying the task otherwise there's a race condition where run on
+ // the same executor that it was intended to be black listed from.
+ val conf = new SparkConf().
+ set(config.BLACKLIST_ENABLED, true).
+ set(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, 1)
+
+ // Create a task with two executors.
+ sc = new SparkContext("local", "test", conf)
+ val exec = "executor1"
+ val host = "host1"
+ val exec2 = "executor2"
+ val host2 = "host2"
+ sched = new FakeTaskScheduler(sc, (exec, host), (exec2, host2))
+ val taskSet = FakeTask.createTaskSet(1)
+
+ val clock = new ManualClock
+ val mockListenerBus = mock(classOf[LiveListenerBus])
+ val blacklistTracker = new BlacklistTracker(mockListenerBus, conf,
None, clock)
+ val taskSetManager = new TaskSetManager(sched, taskSet, 1,
Some(blacklistTracker))
+ val taskSetManagerSpy = spy(taskSetManager)
+
+ val taskDesc = taskSetManagerSpy.resourceOffer(exec, host,
TaskLocality.ANY)
+
+ // Assert the task has been black listed on the executor it was last
executed on.
+ when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
+ new Answer[Unit] {
+ override def answer(invocationOnMock: InvocationOnMock): Unit = {
+ val task = invocationOnMock.getArgumentAt(0, classOf[Int])
+ assert(taskSetManager.taskSetBlacklistHelperOpt.get.
+ isExecutorBlacklistedForTask(exec, task))
+ }
+ }
+ )
+
+ // Simulate an out of memory error
+ val e = new OutOfMemoryError
+ taskSetManagerSpy.handleFailedTask(
+ taskDesc.get.taskId, TaskState.FAILED, new ExceptionFailure(e,
Seq()))
--- End diff --
nit: `ExceptionFailure` is a case class, so you may use:
```
val e = ExceptionFailure("a", "b", Array(), "c", None)
taskSetManagerSpy.handleFailedTask(taskDesc.get.taskId, TaskState.FAILED,
endReason)
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]