[GitHub] spark pull request #16855: [SPARK-13931] Stage can hang if an executor fails...

GavinGavinNo1 Mon, 13 Feb 2017 07:09:13 -0800

Github user GavinGavinNo1 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16855#discussion_r100813496
  
    --- Diff: 
core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala ---
    @@ -664,6 +665,55 @@ class TaskSetManagerSuite extends SparkFunSuite with 
LocalSparkContext with Logg
         assert(thrown2.getMessage().contains("bigger than 
spark.driver.maxResultSize"))
       }
     
    +  test("taskSetManager should not send Resubmitted tasks after being a 
zombie") {
    +    // Regression test for SPARK-13931
    +    val conf = new SparkConf().set("spark.speculation", "true")
    +    sc = new SparkContext("local", "test", conf)
    +
    +    val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", 
"host2"))
    +    sched.initialize(new FakeSchedulerBackend() {
    +      override def killTask(taskId: Long, executorId: String, 
interruptThread: Boolean): Unit = {}
    +    })
    +
    +    // count for Resubmitted tasks
    +    var resubmittedTasks = 0
    +    val dagScheduler = new FakeDAGScheduler(sc, sched) {
    --- End diff --
    
    I still don't understand. I'm so confused about how to construct a failed 
test case before code modified, if I modify it below.
    
    test("taskSetManager should not send Resubmitted tasks after being a 
zombie") {
        // Regression test for SPARK-13931
        val conf = new SparkConf().set("spark.speculation", "true")
        sc = new SparkContext("local", "test", conf)
    
        val sched = new FakeTaskScheduler(sc, ("execA", "host1"), ("execB", 
"host2"))
        sched.initialize(new FakeSchedulerBackend() {
          override def killTask(taskId: Long, executorId: String, 
interruptThread: Boolean): Unit = {}
        })
    
        val dagScheduler = new FakeDAGScheduler(sc, sched)
        sched.setDAGScheduler(dagScheduler)
    
        val singleTask = new ShuffleMapTask(0, 0, null, new Partition {
            override def index: Int = 0
          }, Seq(TaskLocation("host1", "execA")), new Properties, null)
        val taskSet = new TaskSet(Array(singleTask), 0, 0, 0, null)
        val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
    
        // Offer host1, which should be accepted as a PROCESS_LOCAL location
        // by the one task in the task set
        val task1 = manager.resourceOffer("execA", "host1", 
TaskLocality.PROCESS_LOCAL).get
    
        // Mark the task as available for speculation, and then offer another 
resource,
        // which should be used to launch a speculative copy of the task.
        manager.speculatableTasks += singleTask.partitionId
        val task2 = manager.resourceOffer("execB", "host2", 
TaskLocality.ANY).get
    
        assert(manager.runningTasks === 2)
        assert(manager.isZombie === false)
    
        val directTaskResult = new DirectTaskResult[String](null, Seq()) {
          override def value(resultSer: SerializerInstance): String = ""
        }
        // Complete one copy of the task, which should result in the task set 
manager
        // being marked as a zombie, because at least one copy of its only task 
has completed.
        manager.handleSuccessfulTask(task1.taskId, directTaskResult)
        assert(manager.isZombie === true)
        assert(sched.endedTasks(0) === Success)
        assert(manager.runningTasks === 1)
    
        manager.executorLost("execB", "host2", new SlaveLost())
        assert(manager.runningTasks === 0)
        assert(sched.endedTasks(0).isInstanceOf[ExecutorLostFailure])
      }



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #16855: [SPARK-13931] Stage can hang if an executor fails...

Reply via email to