[GitHub] [spark] pgandhi999 commented on a change in pull request #23677: [SPARK-26755][SCHEDULER] : Optimize Spark Scheduler to dequeue speculative tasks…

GitBox Mon, 08 Jul 2019 09:18:10 -0700

pgandhi999 commented on a change in pull request #23677: 
[SPARK-26755][SCHEDULER] : Optimize Spark Scheduler to dequeue speculative 
tasks…
URL: https://github.com/apache/spark/pull/23677#discussion_r301182676


 ##########
 File path: 
core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
 ##########
 @@ -1655,4 +1657,81 @@ class TaskSetManagerSuite extends SparkFunSuite with 
LocalSparkContext with Logg
     // get removed inside TaskSchedulerImpl later.
     assert(availableResources(GPU) sameElements Array("0", "1", "2", "3"))
   }
+
+  test("SPARK-26755 Ensure that a speculative task is submitted only once for 
execution") {
+    sc = new SparkContext("local", "test")
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+    val taskSet = FakeTask.createTaskSet(4)
+    // Set the speculation multiplier to be 0 so speculative tasks are 
launched immediately
+    sc.conf.set(config.SPECULATION_MULTIPLIER, 0.0)
+    sc.conf.set(config.SPECULATION_ENABLED, true)
+    sc.conf.set(config.SPECULATION_QUANTILE, 0.5)
+    val clock = new ManualClock()
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock 
= clock)
+    val accumUpdatesByTask: Array[Seq[AccumulatorV2[_, _]]] = 
taskSet.tasks.map { task =>
+      task.metrics.internalAccums
+    }
+    // Offer resources for 4 tasks to start
+    for ((k, v) <- List(
+      "exec1" -> "host1",
+      "exec1" -> "host1",
+      "exec2" -> "host2",
+      "exec2" -> "host2")) {
+      val taskOption = manager.resourceOffer(k, v, NO_PREF)
+      assert(taskOption.isDefined)
+      val task = taskOption.get
+      assert(task.executorId === k)
+    }
+    assert(sched.startedTasks.toSet === Set(0, 1, 2, 3))
+    clock.advance(1)
+    // Complete the first 2 tasks and leave the other 2 tasks in running
+    for (id <- Set(0, 1)) {
+      manager.handleSuccessfulTask(id, createTaskResult(id, 
accumUpdatesByTask(id)))
+      assert(sched.endedTasks(id) === Success)
+    }
+    // checkSpeculatableTasks checks that the task runtime is greater than the 
threshold for
+    // speculating. Since we use a threshold of 0 for speculation, tasks need 
to be running for
+    // > 0ms, so advance the clock by 1ms here.
+    clock.advance(1)
+    assert(manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.toSet === Set(2, 3))
+    assert(manager.pendingSpeculatableTasks.forExecutor.size === 0)
+    assert(manager.pendingSpeculatableTasks.forHost.size === 0)
+    assert(manager.pendingSpeculatableTasks.forRack.size === 0)
+    assert(manager.pendingSpeculatableTasks.anyPrefs.size === 2)
+    assert(manager.pendingSpeculatableTasks.noPrefs.size === 2)
+
+    // Offer resource to start the speculative attempt for the running task
+    val taskOption5 = manager.resourceOffer("exec1", "host1", NO_PREF)
+    val taskOption6 = manager.resourceOffer("exec1", "host1", NO_PREF)
+    assert(taskOption5.isDefined)
+    val task5 = taskOption5.get
+    assert(task5.index === 2)
+    assert(task5.taskId === 4)
+    assert(task5.executorId === "exec1")
+    assert(task5.attemptNumber === 1)
+    assert(taskOption6.isDefined)
+    val task6 = taskOption6.get
+    assert(task6.index === 3)
+    assert(task6.taskId === 5)
+    assert(task6.executorId === "exec1")
+    assert(task6.attemptNumber === 1)
+    sched.initialize(new FakeSchedulerBackend() {
+      override def killTask(
+        taskId: Long,
+        executorId: String,
+        interruptThread: Boolean,
+        reason: String): Unit = {}
+    })
+    clock.advance(1)
+    // Running checkSpeculatableTasks again should return false
+    assert(!manager.checkSpeculatableTasks(0))
+    assert(manager.pendingSpeculatableTasks.forExecutor.size === 0)
+    assert(manager.pendingSpeculatableTasks.forHost.size === 0)
+    assert(manager.pendingSpeculatableTasks.forRack.size === 0)
+    // allPendingSpeculativeTasks will still have two pending tasks but
+    // pendingSpeculatableTasksWithNoPrefs should have none
+    assert(manager.pendingSpeculatableTasks.anyPrefs.size === 2)
+    assert(manager.pendingSpeculatableTasks.noPrefs.size === 0)
 
 Review comment:
   @squito Your comment does make sense and I can implement it in the test. The 
only part I am stuck at is trying to figure out a way to test that we are not 
resubmitting speculative tasks without checking in the HashMap or the size. 
Would appreciate your guidance in this matter. Thank you.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] pgandhi999 commented on a change in pull request #23677: [SPARK-26755][SCHEDULER] : Optimize Spark Scheduler to dequeue speculative tasks…

Reply via email to