tgravescs commented on a change in pull request #27050: [SPARK-30388][Core] 
Mark running map stages of finished job as finished, and cancel running tasks
URL: https://github.com/apache/spark/pull/27050#discussion_r369711572
 
 

 ##########
 File path: 
core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
 ##########
 @@ -1931,6 +1931,40 @@ class DAGSchedulerSuite extends SparkFunSuite with 
LocalSparkContext with TimeLi
     assertDataStructuresEmpty()
   }
 
+  test("shuffle fetch failed on speculative task, but original task succeed") {
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new 
HashPartitioner(2))
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
+    submit(reduceRdd, Array(0, 1))
+    completeShuffleMapStageSuccessfully(0, 0, 2)
+
+    // result task 0.0 succeed
+    runEvent(makeCompletionEvent(taskSets(1).tasks(0), Success, 42))
+
+    // speculative result task 1.1 fetch failed
+    val info = new TaskInfo(4, index = 1, attemptNumber = 1, 0L, "", "", 
TaskLocality.ANY, true)
+    runEvent(makeCompletionEvent(
+        taskSets(1).tasks(1),
+        FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0L, 0, 
1, "ignored"),
+        null,
+        Seq.empty,
+        Array.empty,
+        info
+      )
+    )
+    Thread.sleep(DAGScheduler.RESUBMIT_TIMEOUT * 2)
+
+    // map stage resubmitted
+    assert(scheduler.runningStages.size === 1)
+    val mapStage = scheduler.runningStages.head
+    assert(mapStage.id === 0)
+    assert(mapStage.latestInfo.failureReason.isEmpty)
+
+    // original result task 1.0  succeed
+    runEvent(makeCompletionEvent(taskSets(1).tasks(1), Success, 42))
+    assert(scheduler.activeJobs.isEmpty)
 
 Review comment:
   this isn't really testing that the stages got cleanup, right?  The 
activeJobs would have been empty without this fix because it gets cleaned up in 
cleanupStateForJobAndIndependentStages.. ie run this test without your fix, 
does the test still pass? 
   can we instead make sure SparkListenerStageCompleted was sent on the cancel

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to