YARN-6739. Crash NM at start time if oversubscription is on but LinuxContainerExcutor or cgroup is off. Contributed by Haibo Chen.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/00678001 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/00678001 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/00678001 Branch: refs/heads/YARN-1011 Commit: 006780013df5624cce67b58dfdc20995a461878a Parents: f9da8c6 Author: Miklos Szegedi <[email protected]> Authored: Mon Apr 2 15:09:52 2018 -0700 Committer: Haibo Chen <[email protected]> Committed: Fri Sep 21 17:02:34 2018 -0700 ---------------------------------------------------------------------- .../monitor/ContainersMonitorImpl.java | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/00678001/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 70a8ebf..7873882 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -221,6 +222,7 @@ public class ContainersMonitorImpl extends AbstractService implements initializeOverAllocation(conf); if (context.isOverAllocationEnabled()) { + checkOverAllocationPrerequisites(); pmemCheckEnabled = true; LOG.info("Force enabling physical memory checks because " + "overallocation is enabled"); @@ -260,6 +262,29 @@ public class ContainersMonitorImpl extends AbstractService implements super.serviceInit(this.conf); } + /** + * Check all prerequisites for NM over-allocation. + */ + private void checkOverAllocationPrerequisites() throws YarnException { + // LinuxContainerExecutor is required to enable overallocation + if (!(containerExecutor instanceof LinuxContainerExecutor)) { + throw new YarnException(LinuxContainerExecutor.class.getName() + + " is required for overallocation"); + } + if (ResourceHandlerModule.getCGroupsHandler() == null) { + throw new YarnException("CGroups must be enabled to support" + + " overallocation"); + } + if (ResourceHandlerModule.getCpuResourceHandler() == null) { + throw new YarnException( + "CGroups cpu isolation must be enabled to support overallocation"); + } + if (ResourceHandlerModule.getMemoryResourceHandler() == null) { + throw new YarnException( + "CGroups memory isolation must be enabled for overallocation"); + } + } + private boolean isContainerMonitorEnabled() { return conf.getBoolean(YarnConfiguration.NM_CONTAINER_MONITOR_ENABLED, YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
