Ngone51 commented on a change in pull request #23677: [SPARK-26755][SCHEDULER]
: Optimize Spark Scheduler to dequeue speculative tasks…
URL: https://github.com/apache/spark/pull/23677#discussion_r303005949
##########
File path: core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
##########
@@ -330,128 +303,65 @@ private[spark] class TaskSetManager(
}
}
- /**
- * Return a speculative task for a given executor if any are available. The
task should not have
- * an attempt running on this host, in case the host is slow. In addition,
the task should meet
- * the given locality constraint.
- */
- // Labeled as protected to allow tests to override providing speculative
tasks if necessary
- protected def dequeueSpeculativeTask(execId: String, host: String, locality:
TaskLocality.Value)
- : Option[(Int, TaskLocality.Value)] =
- {
- speculatableTasks.retain(index => !successful(index)) // Remove finished
tasks from set
-
- def canRunOnHost(index: Int): Boolean = {
- !hasAttemptOnHost(index, host) &&
- !isTaskBlacklistedOnExecOrNode(index, execId, host)
- }
-
- if (!speculatableTasks.isEmpty) {
- // Check for process-local tasks; note that tasks can be process-local
- // on multiple nodes when we replicate cached blocks, as in Spark
Streaming
- for (index <- speculatableTasks if canRunOnHost(index)) {
- val prefs = tasks(index).preferredLocations
- val executors = prefs.flatMap(_ match {
- case e: ExecutorCacheTaskLocation => Some(e.executorId)
- case _ => None
- })
- if (executors.contains(execId)) {
- speculatableTasks -= index
- return Some((index, TaskLocality.PROCESS_LOCAL))
- }
- }
-
- // Check for node-local tasks
- if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
- for (index <- speculatableTasks if canRunOnHost(index)) {
- val locations = tasks(index).preferredLocations.map(_.host)
- if (locations.contains(host)) {
- speculatableTasks -= index
- return Some((index, TaskLocality.NODE_LOCAL))
- }
- }
- }
-
- // Check for no-preference tasks
- if (TaskLocality.isAllowed(locality, TaskLocality.NO_PREF)) {
- for (index <- speculatableTasks if canRunOnHost(index)) {
- val locations = tasks(index).preferredLocations
- if (locations.size == 0) {
- speculatableTasks -= index
- return Some((index, TaskLocality.PROCESS_LOCAL))
- }
- }
- }
-
- // Check for rack-local tasks
- if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
- for (rack <- sched.getRackForHost(host)) {
- for (index <- speculatableTasks if canRunOnHost(index)) {
- val racks =
tasks(index).preferredLocations.map(_.host).flatMap(sched.getRackForHost)
- if (racks.contains(rack)) {
- speculatableTasks -= index
- return Some((index, TaskLocality.RACK_LOCAL))
- }
- }
- }
- }
-
- // Check for non-local tasks
- if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
- for (index <- speculatableTasks if canRunOnHost(index)) {
- speculatableTasks -= index
- return Some((index, TaskLocality.ANY))
- }
- }
- }
-
- None
- }
-
/**
* Dequeue a pending task for a given node and return its index and locality
level.
* Only search for tasks matching the given locality constraint.
*
* @return An option containing (task index within the task set, locality,
is speculative?)
*/
private def dequeueTask(execId: String, host: String, maxLocality:
TaskLocality.Value)
- : Option[(Int, TaskLocality.Value, Boolean)] =
- {
- for (index <- dequeueTaskFromList(execId, host,
getPendingTasksForExecutor(execId))) {
- return Some((index, TaskLocality.PROCESS_LOCAL, false))
+ : Option[(Int, TaskLocality.Value, Boolean)] = {
+ // Tries to schedule a regular task first; if it returns None, then
schedules
+ // a speculative task
+ dequeueTaskHelper(execId, host, maxLocality, false).orElse(
+ dequeueTaskHelper(execId, host, maxLocality, true))
+ }
+
+ private def dequeueTaskHelper(
+ execId: String,
+ host: String,
+ maxLocality: TaskLocality.Value,
+ speculative: Boolean): Option[(Int, TaskLocality.Value, Boolean)] = {
+ if (speculative && speculatableTasks.isEmpty) {
+ return None
+ }
+ val pendingTaskSetToUse = if (speculative) pendingSpeculatableTasks else
pendingTasks
+ def dequeue(list: ArrayBuffer[Int]): Option[Int] = {
+ dequeueTaskFromList(execId, host, list, speculative)
+ }
+
+ dequeue(pendingTaskSetToUse.forExecutor.getOrElse(execId,
ArrayBuffer())).foreach { index =>
+ return Some((index, TaskLocality.PROCESS_LOCAL, speculative))
}
if (TaskLocality.isAllowed(maxLocality, TaskLocality.NODE_LOCAL)) {
- for (index <- dequeueTaskFromList(execId, host,
getPendingTasksForHost(host))) {
- return Some((index, TaskLocality.NODE_LOCAL, false))
+ dequeue(pendingTaskSetToUse.forHost.getOrElse(host,
ArrayBuffer())).foreach { index =>
+ return Some((index, TaskLocality.NODE_LOCAL, speculative))
}
}
+ // Look for noPref tasks after NODE_LOCAL for minimize cross-rack traffic
if (TaskLocality.isAllowed(maxLocality, TaskLocality.NO_PREF)) {
- // Look for noPref tasks after NODE_LOCAL for minimize cross-rack traffic
- for (index <- dequeueTaskFromList(execId, host,
pendingTasksWithNoPrefs)) {
- return Some((index, TaskLocality.PROCESS_LOCAL, false))
+ dequeue(pendingTaskSetToUse.noPrefs).foreach { index =>
+ return Some((index, TaskLocality.NO_PREF, speculative))
Review comment:
Why this change to `NO_PREF` ? Doesn't this changed original behavior ?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]