LantaoJin commented on a change in pull request #23951:
[SPARK-13704][CORE][YARN] Re-implement RackResolver to reduce resolving time
URL: https://github.com/apache/spark/pull/23951#discussion_r266303765
##########
File path:
core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
##########
@@ -1602,4 +1648,50 @@ class TaskSetManagerSuite extends SparkFunSuite with
LocalSparkContext with Logg
verify(sched.dagScheduler).taskEnded(manager.tasks(3), Success,
result.value(),
result.accumUpdates, info3)
}
+
+ test("SPARK-27038 Verify the rack resolving time has been reduced") {
+ sc = new SparkContext("local", "test")
+ for (i <- 0 to 99) {
+ FakeRackUtil.assignHostToRack("host" + i, "rack" + (i % 20))
+ }
+ sched = new FakeTaskScheduler(sc,
+ ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+ val locations = new ArrayBuffer[Seq[TaskLocation]]()
+ for (i <- 0 to 99) {
+ locations += Seq(TaskLocation("host" + i))
+ }
+ val taskSet = FakeTask.createTaskSet(100, locations: _*)
+ val clock = new ManualClock
+ val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock
= clock)
+ var total = 0
+ for (i <- 0 until 20) {
+ val numTaskInRack = manager.getPendingTasksForRack("rack" + i).length
+ assert(numTaskInRack === 5) // check rack assignment is still done
correctly
+ total += numTaskInRack
+ }
+ assert(sched.skipRackResolving === false)
+ assert(total === 100) // verify the total number not changed with
SPARK-27038
+ assert(FakeRackUtil.loopCount === 4) // verify script execution loop count
decreased
+ }
+
+ test("SPARK-27038 Verify the rack resolving time when spark.locality.wait is
zero") {
+ val conf = new SparkConf().set(config.LOCALITY_WAIT.key, "0")
+ sc = new SparkContext("local", "test", conf)
+ for (i <- 0 to 99) {
+ FakeRackUtil.assignHostToRack("host" + i, "rack" + (i % 20))
+ }
+ sched = new FakeTaskScheduler(sc,
+ ("execA", "host1"), ("execB", "host2"), ("execC", "host3"))
+ val locations = new ArrayBuffer[Seq[TaskLocation]]()
+ for (i <- 0 to 99) {
+ locations += Seq(TaskLocation("host" + i))
+ }
+ val taskSet = FakeTask.createTaskSet(100, locations: _*)
+ val clock = new ManualClock
+ val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock
= clock)
+ assert(sched.skipRackResolving === true)
Review comment:
@squito I've refactor for above comments. Now this optimization is not in
`YarnScheduler`, I moved it to `TaskSchedulerImpl`. Based on this way, the
testing is a really test instead of re-implementation in the test mock. In
previous implementation, I mixed the Yarn and Core code together, now they are
spilled.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]