Repository: spark Updated Branches: refs/heads/master 8e8afb3a3 -> 8640dc082
[SPARK-10748][MESOS] Log error instead of crashing Spark Mesos dispatcher when a job is misconfigured ## What changes were proposed in this pull request? Now handling the spark exception which gets thrown for invalid job configuration, marking that job as failed and continuing to launch the other drivers instead of throwing the exception. ## How was this patch tested? I verified manually, now the misconfigured jobs move to Finished Drivers section in UI and continue to launch the other jobs. Author: Devaraj K <deva...@apache.org> Closes #13077 from devaraj-kavali/SPARK-10748. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8640dc08 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8640dc08 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8640dc08 Branch: refs/heads/master Commit: 8640dc08232c3d427a84ca621b07833e32fb8a3a Parents: 8e8afb3 Author: Devaraj K <deva...@apache.org> Authored: Fri Feb 10 14:11:56 2017 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Fri Feb 10 14:11:56 2017 +0000 ---------------------------------------------------------------------- .../cluster/mesos/MesosClusterScheduler.scala | 28 +++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/8640dc08/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala ---------------------------------------------------------------------- diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index c5bbcb9..2760f31 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -559,15 +559,25 @@ private[spark] class MesosClusterScheduler( } else { val offer = offerOption.get val queuedTasks = tasks.getOrElseUpdate(offer.offerId, new ArrayBuffer[TaskInfo]) - val task = createTaskInfo(submission, offer) - queuedTasks += task - logTrace(s"Using offer ${offer.offerId.getValue} to launch driver " + - submission.submissionId) - val newState = new MesosClusterSubmissionState(submission, task.getTaskId, offer.slaveId, - None, new Date(), None, getDriverFrameworkID(submission)) - launchedDrivers(submission.submissionId) = newState - launchedDriversState.persist(submission.submissionId, newState) - afterLaunchCallback(submission.submissionId) + try { + val task = createTaskInfo(submission, offer) + queuedTasks += task + logTrace(s"Using offer ${offer.offerId.getValue} to launch driver " + + submission.submissionId) + val newState = new MesosClusterSubmissionState(submission, task.getTaskId, offer.slaveId, + None, new Date(), None, getDriverFrameworkID(submission)) + launchedDrivers(submission.submissionId) = newState + launchedDriversState.persist(submission.submissionId, newState) + afterLaunchCallback(submission.submissionId) + } catch { + case e: SparkException => + afterLaunchCallback(submission.submissionId) + finishedDrivers += new MesosClusterSubmissionState(submission, TaskID.newBuilder(). + setValue(submission.submissionId).build(), SlaveID.newBuilder().setValue(""). + build(), None, null, None, getDriverFrameworkID(submission)) + logError(s"Failed to launch the driver with id: ${submission.submissionId}, " + + s"cpu: $driverCpu, mem: $driverMem, reason: ${e.getMessage}") + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org