Repository: hadoop Updated Branches: refs/heads/branch-2 135ceb6c7 -> da9f39b10
YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv) (cherry picked from commit b2661765a5a48392a5691cee15904ed2de147b00) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/da9f39b1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/da9f39b1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/da9f39b1 Branch: refs/heads/branch-2 Commit: da9f39b107855003c21728fe83e4ca9f84bc28c8 Parents: 135ceb6 Author: Vinod Kumar Vavilapalli <[email protected]> Authored: Mon Mar 7 11:08:17 2016 -0800 Committer: Vinod Kumar Vavilapalli <[email protected]> Committed: Mon Mar 7 11:11:29 2016 -0800 ---------------------------------------------------------------------- .../linux/resources/ResourceHandlerModule.java | 27 +++++++++++++++----- .../DelegatingLinuxContainerRuntime.java | 13 +--------- .../runtime/DockerLinuxContainerRuntime.java | 27 +++++++++++++++++++- .../runtime/TestDockerContainerRuntime.java | 15 +++++++++++ 4 files changed, 62 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/da9f39b1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java index 7507a82..7fc04bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java @@ -63,7 +63,7 @@ public class ResourceHandlerModule { /** * Returns an initialized, thread-safe CGroupsHandler instance. */ - public static CGroupsHandler getCGroupsHandler(Configuration conf) + private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf) throws ResourceHandlerException { if (cGroupsHandler == null) { synchronized (CGroupsHandler.class) { @@ -77,7 +77,17 @@ public class ResourceHandlerModule { return cGroupsHandler; } - private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler( + /** + * Returns a (possibly null) reference to a cGroupsHandler. This handler is + * non-null only if one or more of the known cgroups-based resource + * handlers are in use and have been initialized. + */ + + public static CGroupsHandler getCGroupsHandler() { + return cGroupsHandler; + } + + private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler( Configuration conf) throws ResourceHandlerException { boolean cgroupsCpuEnabled = conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED, @@ -92,7 +102,8 @@ public class ResourceHandlerModule { if (cGroupsCpuResourceHandler == null) { LOG.debug("Creating new cgroups cpu handler"); cGroupsCpuResourceHandler = - new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf)); + new CGroupsCpuResourceHandlerImpl( + getInitializedCGroupsHandler(conf)); return cGroupsCpuResourceHandler; } } @@ -112,7 +123,7 @@ public class ResourceHandlerModule { LOG.debug("Creating new traffic control bandwidth handler"); trafficControlBandwidthHandler = new TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor - .getInstance(conf), getCGroupsHandler(conf), + .getInstance(conf), getInitializedCGroupsHandler(conf), new TrafficController(conf, PrivilegedOperationExecutor .getInstance(conf))); } @@ -147,7 +158,8 @@ public class ResourceHandlerModule { if (cGroupsBlkioResourceHandler == null) { LOG.debug("Creating new cgroups blkio handler"); cGroupsBlkioResourceHandler = - new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf)); + new CGroupsBlkioResourceHandlerImpl( + getInitializedCGroupsHandler(conf)); } } } @@ -170,7 +182,8 @@ public class ResourceHandlerModule { synchronized (MemoryResourceHandler.class) { if (cGroupsMemoryResourceHandler == null) { cGroupsMemoryResourceHandler = - new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf)); + new CGroupsMemoryResourceHandlerImpl( + getInitializedCGroupsHandler(conf)); } } } @@ -191,7 +204,7 @@ public class ResourceHandlerModule { addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf)); addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf)); - addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf)); + addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf)); resourceHandlerChain = new ResourceHandlerChain(handlerList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/da9f39b1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java index 7adba4d..75abfb0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java @@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; @@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime { throws ContainerExecutionException { PrivilegedOperationExecutor privilegedOperationExecutor = PrivilegedOperationExecutor.getInstance(conf); - CGroupsHandler cGroupsHandler; - try { - cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf); - } catch (ResourceHandlerException e) { - LOG.error("Unable to get cgroups handle."); - throw new ContainerExecutionException(e); - } - defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime( privilegedOperationExecutor); defaultLinuxContainerRuntime.initialize(conf); dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime( - privilegedOperationExecutor, cGroupsHandler); + privilegedOperationExecutor); dockerLinuxContainerRuntime.initialize(conf); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/da9f39b1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java index 2b4fc79..c66189d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; @@ -89,9 +91,24 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { } public DockerLinuxContainerRuntime(PrivilegedOperationExecutor + privilegedOperationExecutor) { + this(privilegedOperationExecutor, ResourceHandlerModule + .getCGroupsHandler()); + } + + //A constructor with an injected cGroupsHandler primarily used for testing. + @VisibleForTesting + public DockerLinuxContainerRuntime(PrivilegedOperationExecutor privilegedOperationExecutor, CGroupsHandler cGroupsHandler) { this.privilegedOperationExecutor = privilegedOperationExecutor; - this.cGroupsHandler = cGroupsHandler; + + if (cGroupsHandler == null) { + if (LOG.isInfoEnabled()) { + LOG.info("cGroupsHandler is null - cgroups not in use."); + } + } else { + this.cGroupsHandler = cGroupsHandler; + } } @Override @@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime { public void addCGroupParentIfRequired(String resourcesOptions, String containerIdStr, DockerRunCommand runCommand) throws ContainerExecutionException { + if (cGroupsHandler == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to" + + " do."); + } + return; + } + if (resourcesOptions.equals( (PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation .CGROUP_ARG_NO_TASKS))) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/da9f39b1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index 6898634..e05719c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -429,5 +429,20 @@ public class TestDockerContainerRuntime { //--cgroup-parent should be added for the containerId in question String expectedPath = "/" + hierarchy + "/" + containerIdStr; Mockito.verify(command).setCGroupParent(expectedPath); + + //create a runtime with a 'null' cgroups handler - i.e no + // cgroup-based resource handlers are in use. + + runtime = new DockerLinuxContainerRuntime + (mockExecutor, null); + runtime.initialize(conf); + + runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr, + command); + runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr, + command); + + //no --cgroup-parent should be added in either case + Mockito.verifyZeroInteractions(command); } }
