Repository: hadoop Updated Branches: refs/heads/YARN-1011 ad642186a -> bb5991423
YARN-8813. Improve debug messages for NM preemption of OPPORTUNISTIC containers (haibochen via rkanter) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bb599142 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bb599142 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bb599142 Branch: refs/heads/YARN-1011 Commit: bb59914237b76cb7409b63da27b21a07a4481942 Parents: ad64218 Author: Robert Kanter <[email protected]> Authored: Tue Oct 9 16:15:35 2018 -0700 Committer: Robert Kanter <[email protected]> Committed: Tue Oct 9 16:15:35 2018 -0700 ---------------------------------------------------------------------- .../linux/resources/CGroupElasticMemoryController.java | 13 ++++++++----- .../linux/resources/DefaultOOMHandler.java | 3 +++ .../monitor/ContainersMonitorImpl.java | 4 ++++ .../SnapshotBasedOverAllocationPreemptionPolicy.java | 12 ++++++++++++ 4 files changed, 27 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/bb599142/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java index 752c3a6..b47edbe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java @@ -94,6 +94,7 @@ public class CGroupElasticMemoryController extends Thread { boolean controlVirtual = controlVirtualMemory && !controlPhysicalMemory; Runnable oomHandlerTemp = getDefaultOOMHandler(conf, context, oomHandlerOverride, controlVirtual); + LOG.info("Using OOMHandler: " + oomHandlerTemp.getClass().getName()); if (controlPhysicalMemory && controlVirtualMemory) { LOG.warn( NM_ELASTIC_MEMORY_CONTROL_ENABLED + " is on. " + @@ -138,11 +139,10 @@ public class CGroupElasticMemoryController extends Thread { Configuration conf, Context context, Runnable oomHandlerLocal, boolean controlVirtual) throws YarnException { - Class oomHandlerClass = - conf.getClass( - YarnConfiguration.NM_ELASTIC_MEMORY_CONTROL_OOM_HANDLER, - DefaultOOMHandler.class); if (oomHandlerLocal == null) { + Class oomHandlerClass = conf.getClass( + YarnConfiguration.NM_ELASTIC_MEMORY_CONTROL_OOM_HANDLER, + DefaultOOMHandler.class); try { Constructor constr = oomHandlerClass.getConstructor( Context.class, boolean.class); @@ -284,12 +284,15 @@ public class CGroupElasticMemoryController extends Thread { // This loop can be exited by terminating the process // with stopListening() while ((read = events.read(event)) == event.length) { + if (LOG.isDebugEnabled()) { + LOG.debug("OOM event notification received from oom-listener"); + } // An OOM event has occurred resolveOOM(executor); } if (read != -1) { - LOG.warn(String.format("Characters returned from event hander: %d", + LOG.warn(String.format("Characters returned from event handler: %d", read)); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/bb599142/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java index 86137b5..595aa70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java @@ -181,6 +181,9 @@ public class DefaultOOMHandler implements Runnable { CGroupsHandler.CGroupController.MEMORY, "", CGROUP_PARAM_MEMORY_OOM_CONTROL); + if (LOG.isDebugEnabled()) { + LOG.debug("OOM status read from cgroups: " + status); + } if (!status.contains(CGroupsHandler.UNDER_OOM)) { break; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/bb599142/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index c36dfd4..8bd4c47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -1096,6 +1096,10 @@ public class ContainersMonitorImpl extends AbstractService implements private void setLatestContainersUtilization(ResourceUtilization utilization) { this.latestContainersUtilization = new ContainersResourceUtilization( utilization, Time.now()); + if (LOG.isDebugEnabled()) { + LOG.debug("Updated latest containers resource utilization to " + + latestContainersUtilization.getUtilization()); + } } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/bb599142/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java index 188a108..e4665bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java @@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.records.ResourceThresholds; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * An implementation of {@link NMAllocationPreemptionPolicy} based on the @@ -29,6 +31,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.Contai */ public class SnapshotBasedOverAllocationPreemptionPolicy extends NMAllocationPreemptionPolicy { + private static final Logger LOG = LoggerFactory.getLogger( + SnapshotBasedOverAllocationPreemptionPolicy.class); private final int absoluteMemoryPreemptionThresholdMb; private final float cpuPreemptionThreshold; private final int maxTimesCpuOverPreemption; @@ -52,6 +56,10 @@ public class SnapshotBasedOverAllocationPreemptionPolicy ResourceUtilization utilization = getContainersMonitor().getContainersUtilization(true).getUtilization(); + if (LOG.isDebugEnabled()) { + LOG.debug("The latest container utilization is" + utilization); + } + int memoryOverLimit = utilization.getPhysicalMemory() - absoluteMemoryPreemptionThresholdMb; float vcoreOverLimit = utilization.getCPU() - cpuPreemptionThreshold; @@ -59,6 +67,10 @@ public class SnapshotBasedOverAllocationPreemptionPolicy if (vcoreOverLimit > 0) { timesCpuOverPreemption++; if (timesCpuOverPreemption > maxTimesCpuOverPreemption) { + if (LOG.isDebugEnabled()) { + LOG.debug("CPU utilization is over the preemption threshold " + + timesCpuOverPreemption + " times consecutively."); + } timesCpuOverPreemption = 0; } else { // report no over limit for cpu if # of times CPU is over the preemption --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
