Author: suresh
Date: Mon Dec 3 18:04:51 2012
New Revision: 1416603
URL: http://svn.apache.org/viewvc?rev=1416603&view=rev
Log:
Merging trunk to HDFS-2802 branch.
Added:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
- copied unchanged from r1416602,
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
- copied unchanged from r1416602,
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
Modified: hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt?rev=1416603&r1=1416602&r2=1416603&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-yarn-project/CHANGES.txt Mon Dec 3
18:04:51 2012
@@ -117,7 +117,11 @@ Release 2.0.3-alpha - Unreleased
YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy)
- YARN-187. Add hierarchical queues to the fair scheduler. (Sandy Ryza via
tomwhite)
+ YARN-187. Add hierarchical queues to the fair scheduler.
+ (Sandy Ryza via tomwhite)
+
+ YARN-72. NM should handle cleaning up containers when it shuts down.
+ (Sandy Ryza via tomwhite)
Release 2.0.2-alpha - 2012-09-07
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java?rev=1416603&r1=1416602&r2=1416603&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
(original)
+++
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
Mon Dec 3 18:04:51 2012
@@ -25,13 +25,23 @@ import org.apache.hadoop.yarn.api.record
public class CMgrCompletedContainersEvent extends ContainerManagerEvent {
private List<ContainerId> containerToCleanup;
-
- public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup) {
+ private Reason reason;
+
+ public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup,
Reason reason) {
super(ContainerManagerEventType.FINISH_CONTAINERS);
this.containerToCleanup = containersToCleanup;
+ this.reason = reason;
}
public List<ContainerId> getContainersToCleanup() {
return this.containerToCleanup;
}
+
+ public Reason getReason() {
+ return reason;
+ }
+
+ public static enum Reason {
+ ON_SHUTDOWN, BY_RESOURCEMANAGER
+ }
}
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1416603&r1=1416602&r2=1416603&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
(original)
+++
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
Mon Dec 3 18:04:51 2012
@@ -19,6 +19,9 @@
package org.apache.hadoop.yarn.server.nodemanager;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
@@ -61,14 +64,24 @@ public class NodeManager extends Composi
* Priority of the NodeManager shutdown hook.
*/
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
+
+ /**
+ * Extra duration to wait for containers to be killed on shutdown.
+ */
+ private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000;
private static final Log LOG = LogFactory.getLog(NodeManager.class);
protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
private ApplicationACLsManager aclsManager;
private NodeHealthCheckerService nodeHealthChecker;
private LocalDirsHandlerService dirsHandler;
+ private Context context;
+ private AsyncDispatcher dispatcher;
+ private ContainerManagerImpl containerManager;
private static CompositeServiceShutdownHook nodeManagerShutdownHook;
+ private long waitForContainersOnShutdownMillis;
+
public NodeManager() {
super(NodeManager.class.getName());
}
@@ -115,7 +128,7 @@ public class NodeManager extends Composi
containerTokenSecretManager = new NMContainerTokenSecretManager(conf);
}
- Context context = new NMContext(containerTokenSecretManager);
+ this.context = new NMContext(containerTokenSecretManager);
this.aclsManager = new ApplicationACLsManager(conf);
@@ -131,7 +144,7 @@ public class NodeManager extends Composi
addService(del);
// NodeManager level dispatcher
- AsyncDispatcher dispatcher = new AsyncDispatcher();
+ this.dispatcher = new AsyncDispatcher();
nodeHealthChecker = new NodeHealthCheckerService();
addService(nodeHealthChecker);
@@ -144,7 +157,7 @@ public class NodeManager extends Composi
NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
addService(nodeResourceMonitor);
- ContainerManagerImpl containerManager =
+ containerManager =
createContainerManager(context, exec, del, nodeStatusUpdater,
this.aclsManager, dirsHandler);
addService(containerManager);
@@ -155,13 +168,20 @@ public class NodeManager extends Composi
dispatcher.register(ContainerManagerEventType.class, containerManager);
addService(dispatcher);
-
+
DefaultMetricsSystem.initialize("NodeManager");
// StatusUpdater should be added last so that it get started last
// so that we make sure everything is up before registering with RM.
addService(nodeStatusUpdater);
-
+
+ waitForContainersOnShutdownMillis =
+ conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
+ YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) +
+ conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS,
+ YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) +
+ SHUTDOWN_CLEANUP_SLOP_MS;
+
super.init(conf);
// TODO add local dirs to del
}
@@ -178,9 +198,44 @@ public class NodeManager extends Composi
@Override
public void stop() {
+ cleanupContainers();
super.stop();
DefaultMetricsSystem.shutdown();
}
+
+ @SuppressWarnings("unchecked")
+ private void cleanupContainers() {
+ Map<ContainerId, Container> containers = context.getContainers();
+ if (containers.isEmpty()) {
+ return;
+ }
+ LOG.info("Containers still running on shutdown: " + containers.keySet());
+
+ List<ContainerId> containerIds = new
ArrayList<ContainerId>(containers.keySet());
+ dispatcher.getEventHandler().handle(
+ new CMgrCompletedContainersEvent(containerIds,
+ CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN));
+
+ LOG.info("Waiting for containers to be killed");
+
+ long waitStartTime = System.currentTimeMillis();
+ while (!containers.isEmpty() &&
+ System.currentTimeMillis() - waitStartTime <
waitForContainersOnShutdownMillis) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException ex) {
+ LOG.warn("Interrupted while sleeping on container kill", ex);
+ }
+ }
+
+ // All containers killed
+ if (containers.isEmpty()) {
+ LOG.info("All containers in DONE state");
+ } else {
+ LOG.info("Done waiting for containers to be killed. Still alive: " +
+ containers.keySet());
+ }
+ }
public static class NMContext implements Context {
@@ -282,6 +337,11 @@ public class NodeManager extends Composi
NodeManager createNewNodeManager() {
return new NodeManager();
}
+
+ // For testing
+ ContainerManagerImpl getContainerManager() {
+ return containerManager;
+ }
public static void main(String[] args) {
Thread.setDefaultUncaughtExceptionHandler(new
YarnUncaughtExceptionHandler());
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1416603&r1=1416602&r2=1416603&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
(original)
+++
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
Mon Dec 3 18:04:51 2012
@@ -363,7 +363,8 @@ public class NodeStatusUpdaterImpl exten
.getContainersToCleanupList();
if (containersToCleanup.size() != 0) {
dispatcher.getEventHandler().handle(
- new CMgrCompletedContainersEvent(containersToCleanup));
+ new CMgrCompletedContainersEvent(containersToCleanup,
+ CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER));
}
List<ApplicationId> appsToCleanup =
response.getApplicationsToCleanupList();
Modified:
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java?rev=1416603&r1=1416602&r2=1416603&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
(original)
+++
hadoop/common/branches/HDFS-2802/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
Mon Dec 3 18:04:51 2012
@@ -23,6 +23,8 @@ import static org.apache.hadoop.yarn.ser
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -593,9 +595,16 @@ public class ContainerManagerImpl extend
(CMgrCompletedContainersEvent) event;
for (ContainerId container : containersFinishedEvent
.getContainersToCleanup()) {
+ String diagnostic = "";
+ if (containersFinishedEvent.getReason() ==
+ CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) {
+ diagnostic = "Container Killed on Shutdown";
+ } else if (containersFinishedEvent.getReason() ==
+ CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) {
+ diagnostic = "Container Killed by ResourceManager";
+ }
this.dispatcher.getEventHandler().handle(
- new ContainerKillEvent(container,
- "Container Killed by ResourceManager"));
+ new ContainerKillEvent(container, diagnostic));
}
break;
default: