Github user felixcheung commented on a diff in the pull request:
https://github.com/apache/spark/pull/20640#discussion_r191272509
--- Diff:
resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
---
@@ -648,14 +645,8 @@ private[spark] class
MesosCoarseGrainedSchedulerBackend(
totalGpusAcquired -= gpus
gpusByTaskId -= taskId
}
- // If it was a failure, mark the slave as failed for blacklisting
purposes
if (TaskState.isFailed(state)) {
- slave.taskFailures += 1
-
- if (slave.taskFailures >= MAX_SLAVE_FAILURES) {
- logInfo(s"Blacklisting Mesos slave $slaveId due to too many
failures; " +
- "is Spark installed on it?")
- }
+ logError(s"Task $taskId failed on Mesos slave $slaveId.")
--- End diff --
+1 ok
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]