Repository: hadoop Updated Branches: refs/heads/branch-2.7 b52ac3cb4 -> 3afdac6e2
YARN-4794. Deadlock in NMClientImpl. Contributed by Jian He. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3afdac6e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3afdac6e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3afdac6e Branch: refs/heads/branch-2.7 Commit: 3afdac6e2c5b98cf1b5e3069ae722a06f645d58d Parents: b52ac3c Author: Rohith Sharma K S <rohithsharm...@apache.org> Authored: Wed Apr 13 11:13:42 2016 +0530 Committer: Rohith Sharma K S <rohithsharm...@apache.org> Committed: Wed Apr 13 11:13:42 2016 +0530 ---------------------------------------------------------------------- .../yarn/client/api/impl/NMClientImpl.java | 39 ++++---------------- 1 file changed, 8 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3afdac6e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java index 3518f35..2ed3ca1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/NMClientImpl.java @@ -147,8 +147,7 @@ public class NMClientImpl extends NMClient { private ContainerState state; - public StartedContainer(ContainerId containerId, NodeId nodeId, - Token containerToken) { + public StartedContainer(ContainerId containerId, NodeId nodeId) { this.containerId = containerId; this.nodeId = nodeId; state = ContainerState.NEW; @@ -170,8 +169,6 @@ public class NMClientImpl extends NMClient { throw RPCUtil.getRemoteException("Container " + startedContainer.containerId.toString() + " is already started"); } - startedContainers - .put(startedContainer.getContainerId(), startedContainer); } @Override @@ -181,7 +178,8 @@ public class NMClientImpl extends NMClient { // Do synchronization on StartedContainer to prevent race condition // between startContainer and stopContainer only when startContainer is // in progress for a given container. - StartedContainer startingContainer = createStartedContainer(container); + StartedContainer startingContainer = + new StartedContainer(container.getId(), container.getNodeId()); synchronized (startingContainer) { addStartingContainer(startingContainer); @@ -209,18 +207,14 @@ public class NMClientImpl extends NMClient { } allServiceResponse = response.getAllServicesMetaData(); startingContainer.state = ContainerState.RUNNING; - } catch (YarnException e) { + } catch (YarnException | IOException e) { startingContainer.state = ContainerState.COMPLETE; // Remove the started container if it failed to start - removeStartedContainer(startingContainer); - throw e; - } catch (IOException e) { - startingContainer.state = ContainerState.COMPLETE; - removeStartedContainer(startingContainer); + startedContainers.remove(startingContainer.containerId); throw e; } catch (Throwable t) { startingContainer.state = ContainerState.COMPLETE; - removeStartedContainer(startingContainer); + startedContainers.remove(startingContainer.containerId); throw RPCUtil.getRemoteException(t); } finally { if (proxy != null) { @@ -234,7 +228,7 @@ public class NMClientImpl extends NMClient { @Override public void stopContainer(ContainerId containerId, NodeId nodeId) throws YarnException, IOException { - StartedContainer startedContainer = getStartedContainer(containerId); + StartedContainer startedContainer = startedContainers.get(containerId); // Only allow one request of stopping the container to move forward // When entering the block, check whether the precursor has already stopped @@ -247,7 +241,7 @@ public class NMClientImpl extends NMClient { stopContainerInternal(containerId, nodeId); // Only after successful startedContainer.state = ContainerState.COMPLETE; - removeStartedContainer(startedContainer); + startedContainers.remove(startedContainer.containerId); } } else { stopContainerInternal(containerId, nodeId); @@ -304,23 +298,6 @@ public class NMClientImpl extends NMClient { } } } - - protected synchronized StartedContainer createStartedContainer( - Container container) throws YarnException, IOException { - StartedContainer startedContainer = new StartedContainer(container.getId(), - container.getNodeId(), container.getContainerToken()); - return startedContainer; - } - - protected synchronized void - removeStartedContainer(StartedContainer container) { - startedContainers.remove(container.containerId); - } - - protected synchronized StartedContainer getStartedContainer( - ContainerId containerId) { - return startedContainers.get(containerId); - } public AtomicBoolean getCleanupRunningContainers() { return cleanupRunningContainers;