Repository: spark
Updated Branches:
  refs/heads/branch-1.0 bc3bfeaff -> 36e687d90


SPARK-1689 AppClient should indicate app is dead() when removed

Previously, we indicated disconnected(), which keeps the application in a limbo 
state where it has no executors but thinks it will get them soon.

This is a bug fix that hopefully can be included in 1.0.

Author: Aaron Davidson <aa...@databricks.com>

Closes #605 from aarondav/appremoved and squashes the following commits:

bea02a2 [Aaron Davidson] SPARK-1689 AppClient should indicate app is dead() 
when removed
(cherry picked from commit 34719ba32ed421701eaa08bd47ce953cd9267ad7)

Signed-off-by: Patrick Wendell <pwend...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36e687d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36e687d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36e687d9

Branch: refs/heads/branch-1.0
Commit: 36e687d90d28a8f38b08fb53db3069e3000fc09c
Parents: bc3bfea
Author: Aaron Davidson <aa...@databricks.com>
Authored: Sat May 3 13:27:10 2014 -0700
Committer: Patrick Wendell <pwend...@gmail.com>
Committed: Sat May 3 13:27:17 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/deploy/client/AppClient.scala      | 12 +++++-------
 .../apache/spark/deploy/client/AppClientListener.scala  |  4 ++--
 .../org/apache/spark/deploy/client/TestClient.scala     |  4 ++--
 .../scheduler/cluster/SparkDeploySchedulerBackend.scala |  6 +++---
 4 files changed, 12 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/36e687d9/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala 
b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index 8901806..57085fc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -92,8 +92,7 @@ private[spark] class AppClient(
           if (registered) {
             retryTimer.cancel()
           } else if (retries >= REGISTRATION_RETRIES) {
-            logError("All masters are unresponsive! Giving up.")
-            markDead()
+            markDead("All masters are unresponsive! Giving up.")
           } else {
             tryRegisterAllMasters()
           }
@@ -126,8 +125,7 @@ private[spark] class AppClient(
         listener.connected(appId)
 
       case ApplicationRemoved(message) =>
-        logError("Master removed our application: %s; stopping 
client".format(message))
-        markDisconnected()
+        markDead("Master removed our application: %s".format(message))
         context.stop(self)
 
       case ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: 
Int, memory: Int) =>
@@ -158,7 +156,7 @@ private[spark] class AppClient(
         logWarning(s"Could not connect to $address: $cause")
 
       case StopAppClient =>
-        markDead()
+        markDead("Application has been stopped.")
         sender ! true
         context.stop(self)
     }
@@ -173,9 +171,9 @@ private[spark] class AppClient(
       }
     }
 
-    def markDead() {
+    def markDead(reason: String) {
       if (!alreadyDead) {
-        listener.dead()
+        listener.dead(reason)
         alreadyDead = true
       }
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/36e687d9/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala 
b/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
index 1f20aa3..e584952 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
@@ -30,8 +30,8 @@ private[spark] trait AppClientListener {
   /** Disconnection may be a temporary state, as we fail over to a new Master. 
*/
   def disconnected(): Unit
 
-  /** Dead means that we couldn't find any Masters to connect to, and have 
given up. */
-  def dead(): Unit
+  /** An application death is an unrecoverable failure condition. */
+  def dead(reason: String): Unit
 
   def executorAdded(fullId: String, workerId: String, hostPort: String, cores: 
Int, memory: Int)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/36e687d9/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala 
b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 888dd45..e15a87b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -33,8 +33,8 @@ private[spark] object TestClient {
       System.exit(0)
     }
 
-    def dead() {
-      logInfo("Could not connect to master")
+    def dead(reason: String) {
+      logInfo("Application died with error: " + reason)
       System.exit(0)
     }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/36e687d9/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 9544ca0..cefa417 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -83,10 +83,10 @@ private[spark] class SparkDeploySchedulerBackend(
     }
   }
 
-  override def dead() {
+  override def dead(reason: String) {
     if (!stopping) {
-      logError("Spark cluster looks dead, giving up.")
-      scheduler.error("Spark cluster looks down")
+      logError("Application has been killed. Reason: " + reason)
+      scheduler.error(reason)
     }
   }
 

Reply via email to