Repository: spark
Updated Branches:
  refs/heads/master 51b03b71f -> a41b68b95


[SPARK-12265][MESOS] Spark calls System.exit inside driver instead of throwing 
exception

This takes over #10729 and makes sure that `spark-shell` fails with a proper 
error message. There is a slight behavioral change: before this change 
`spark-shell` would exit, while now the REPL is still there, but `sc` and 
`sqlContext` are not defined and the error is visible to the user.

Author: Nilanjan Raychaudhuri <nraychaudh...@gmail.com>
Author: Iulian Dragos <jagua...@gmail.com>

Closes #10921 from dragos/pr/10729.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a41b68b9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a41b68b9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a41b68b9

Branch: refs/heads/master
Commit: a41b68b954ba47284a1df312f0aaea29b0721b0a
Parents: 51b03b7
Author: Nilanjan Raychaudhuri <nraychaudh...@gmail.com>
Authored: Mon Feb 1 13:33:24 2016 -0800
Committer: Andrew Or <and...@databricks.com>
Committed: Mon Feb 1 13:33:24 2016 -0800

----------------------------------------------------------------------
 .../cluster/mesos/MesosClusterScheduler.scala   |  1 +
 .../cluster/mesos/MesosSchedulerBackend.scala   |  1 +
 .../cluster/mesos/MesosSchedulerUtils.scala     | 21 ++++++++++++++++----
 3 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a41b68b9/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 05fda0f..e77d772 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -573,6 +573,7 @@ private[spark] class MesosClusterScheduler(
   override def slaveLost(driver: SchedulerDriver, slaveId: SlaveID): Unit = {}
   override def error(driver: SchedulerDriver, error: String): Unit = {
     logError("Error received: " + error)
+    markErr()
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/a41b68b9/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index eaf0cb0..a8bf79a 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -375,6 +375,7 @@ private[spark] class MesosSchedulerBackend(
   override def error(d: SchedulerDriver, message: String) {
     inClassLoader() {
       logError("Mesos error: " + message)
+      markErr()
       scheduler.error(message)
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/a41b68b9/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 010caff..f9f5da9 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -106,28 +106,37 @@ private[mesos] trait MesosSchedulerUtils extends Logging {
         registerLatch.await()
         return
       }
+      @volatile
+      var error: Option[Exception] = None
 
+      // We create a new thread that will block inside `mesosDriver.run`
+      // until the scheduler exists
       new Thread(Utils.getFormattedClassName(this) + "-mesos-driver") {
         setDaemon(true)
-
         override def run() {
-          mesosDriver = newDriver
           try {
+            mesosDriver = newDriver
             val ret = mesosDriver.run()
             logInfo("driver.run() returned with code " + ret)
             if (ret != null && ret.equals(Status.DRIVER_ABORTED)) {
-              System.exit(1)
+              error = Some(new SparkException("Error starting driver, 
DRIVER_ABORTED"))
+              markErr()
             }
           } catch {
             case e: Exception => {
               logError("driver.run() failed", e)
-              System.exit(1)
+              error = Some(e)
+              markErr()
             }
           }
         }
       }.start()
 
       registerLatch.await()
+
+      // propagate any error to the calling thread. This ensures that 
SparkContext creation fails
+      // without leaving a broken context that won't be able to schedule any 
tasks
+      error.foreach(throw _)
     }
   }
 
@@ -144,6 +153,10 @@ private[mesos] trait MesosSchedulerUtils extends Logging {
     registerLatch.countDown()
   }
 
+  protected def markErr(): Unit = {
+    registerLatch.countDown()
+  }
+
   def createResource(name: String, amount: Double, role: Option[String] = 
None): Resource = {
     val builder = Resource.newBuilder()
       .setName(name)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to