Repository: incubator-reef Updated Branches: refs/heads/master 582284e6d -> 48c16b8ce
[REEF-726] Fix a race condition with completed Containers This removes the releasing of Evaluators directly in `YarnContainerManager` and instead delegates the job to `EvaluatorManager` on container complete. JIRA: [REEF-726](https://issues.apache.org/jira/browse/REEF-726) Pull Request: This closes #476 Project: http://git-wip-us.apache.org/repos/asf/incubator-reef/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-reef/commit/48c16b8c Tree: http://git-wip-us.apache.org/repos/asf/incubator-reef/tree/48c16b8c Diff: http://git-wip-us.apache.org/repos/asf/incubator-reef/diff/48c16b8c Branch: refs/heads/master Commit: 48c16b8ce4dfa4423881a9f6f199e0998c547fed Parents: 582284e Author: Andrew Chung <afchun...@gmail.com> Authored: Wed Sep 9 14:48:07 2015 -0700 Committer: Markus Weimer <wei...@apache.org> Committed: Mon Sep 14 15:22:16 2015 -0700 ---------------------------------------------------------------------- .../reef/runtime/common/driver/evaluator/EvaluatorManager.java | 1 - .../org/apache/reef/runtime/yarn/driver/YarnContainerManager.java | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-reef/blob/48c16b8c/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java ---------------------------------------------------------------------- diff --git a/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java b/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java index 3ceff3e..018c3eb 100644 --- a/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java +++ b/lang/java/reef-common/src/main/java/org/apache/reef/runtime/common/driver/evaluator/EvaluatorManager.java @@ -556,7 +556,6 @@ public final class EvaluatorManager implements Identifiable, AutoCloseable { .append(this.task.get().getId()) .append("] was running when the Evaluator crashed."); } - this.isResourceReleased = true; if (resourceStatusEvent.getState() == ReefServiceProtos.State.KILLED) { this.onEvaluatorException(new EvaluatorKilledByResourceManagerException(this.evaluatorId, http://git-wip-us.apache.org/repos/asf/incubator-reef/blob/48c16b8c/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java ---------------------------------------------------------------------- diff --git a/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java b/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java index 0e594ac..e088649 100644 --- a/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java +++ b/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java @@ -343,8 +343,6 @@ final class YarnContainerManager status.setState(ReefServiceProtos.State.FAILED); } status.setExitCode(value.getExitStatus()); - // remove the completed container (can be either done/killed/failed) from book keeping - this.containers.removeAndGet(containerId); break; default: LOG.info("Container running"); @@ -356,6 +354,7 @@ final class YarnContainerManager status.setDiagnostics(value.getDiagnostics()); } + // The ResourceStatusHandler should close and release the Evaluator for us if the state is a terminal state. this.reefEventHandlers.onResourceStatus(status.build()); } }