Repository: spark Updated Branches: refs/heads/branch-1.2 47931975e -> 6b6b7791d
[SPARK-4498][core] Don't transition ExecutorInfo to RUNNING until Driver adds Executor The ExecutorInfo only reaches the RUNNING state if the Driver is alive to send the ExecutorStateChanged message to master. Else, appInfo.resetRetryCount() is never called and failing Executors will eventually exceed ApplicationState.MAX_NUM_RETRY, resulting in the application being removed from the master's accounting. Author: Mark Hamstra <[email protected]> Closes #3550 from markhamstra/SPARK-4498 and squashes the following commits: 8f543b1 [Mark Hamstra] Don't transition ExecutorInfo to RUNNING until Executor is added by Driver Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b6b7791 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b6b7791 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b6b7791 Branch: refs/heads/branch-1.2 Commit: 6b6b7791d544376f8010b20e839c1627a71c69cb Parents: 4793197 Author: Mark Hamstra <[email protected]> Authored: Wed Dec 3 15:08:01 2014 -0800 Committer: Josh Rosen <[email protected]> Committed: Wed Dec 3 15:10:44 2014 -0800 ---------------------------------------------------------------------- core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala | 1 + .../main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6b6b7791/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala index 98a93d1..4efebca 100644 --- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala @@ -134,6 +134,7 @@ private[spark] class AppClient( val fullId = appId + "/" + id logInfo("Executor added: %s on %s (%s) with %d cores".format(fullId, workerId, hostPort, cores)) + master ! ExecutorStateChanged(appId, id, ExecutorState.RUNNING, None, None) listener.executorAdded(fullId, workerId, hostPort, cores, memory) case ExecutorUpdated(id, state, message, exitStatus) => http://git-wip-us.apache.org/repos/asf/spark/blob/6b6b7791/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index 8ba6a01..f4fedc6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -144,8 +144,6 @@ private[spark] class ExecutorRunner( Files.write(header, stderr, UTF_8) stderrAppender = FileAppender(process.getErrorStream, stderr, conf) - state = ExecutorState.RUNNING - worker ! ExecutorStateChanged(appId, execId, state, None, None) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown) // or with nonzero exit code val exitCode = process.waitFor() --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
