YARN-4308. ContainersAggregated CPU resource utilization reports negative usage 
in first few heartbeats. Contributed by Sunil G


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1500a0a3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1500a0a3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1500a0a3

Branch: refs/heads/HDFS-1312
Commit: 1500a0a3009e453c9f05a93df7a78b4e185eef30
Parents: ae04765
Author: Naganarasimha <naganarasimha...@apache.org>
Authored: Thu Jun 9 05:41:09 2016 +0530
Committer: Naganarasimha <naganarasimha...@apache.org>
Committed: Thu Jun 9 05:41:09 2016 +0530

----------------------------------------------------------------------
 .../yarn/util/ProcfsBasedProcessTree.java       |  8 +++
 .../util/ResourceCalculatorProcessTree.java     |  4 +-
 .../yarn/util/WindowsBasedProcessTree.java      |  8 +++
 .../monitor/ContainersMonitorImpl.java          |  9 +++
 .../MockCPUResourceCalculatorProcessTree.java   | 70 ++++++++++++++++++++
 .../MockResourceCalculatorProcessTree.java      |  5 ++
 .../TestContainersMonitorResourceChange.java    | 62 ++++++++++++++++-
 7 files changed, 163 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
index bb9c183..80d49c3 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
@@ -467,6 +467,14 @@ public class ProcfsBasedProcessTree extends 
ResourceCalculatorProcessTree {
     return totalStime.add(BigInteger.valueOf(totalUtime));
   }
 
+  /**
+   * Get the CPU usage by all the processes in the process-tree in Unix.
+   * Note: UNAVAILABLE will be returned in case when CPU usage is not
+   * available. It is NOT advised to return any other error code.
+   *
+   * @return percentage CPU usage since the process-tree was created,
+   * {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
+   */
   @Override
   public float getCpuUsagePercent() {
     BigInteger processTotalJiffies = getTotalProcessJiffies();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
index 7214c75..771ec86 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
@@ -187,9 +187,11 @@ public abstract class ResourceCalculatorProcessTree 
extends Configured {
    * Get the CPU usage by all the processes in the process-tree based on
    * average between samples as a ratio of overall CPU cycles similar to top.
    * Thus, if 2 out of 4 cores are used this should return 200.0.
+   * Note: UNAVAILABLE will be returned in case when CPU usage is not
+   * available. It is NOT advised to return any other error code.
    *
    * @return percentage CPU usage since the process-tree was created,
-   * {@link #UNAVAILABLE} if it cannot be calculated.
+   * {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
    */
   public float getCpuUsagePercent() {
     return UNAVAILABLE;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
index 7858292..1c7eaf7 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
@@ -268,6 +268,14 @@ public class WindowsBasedProcessTree extends 
ResourceCalculatorProcessTree {
     return BigInteger.valueOf(totalMs);
   }
 
+  /**
+   * Get the CPU usage by all the processes in the process-tree in Windows.
+   * Note: UNAVAILABLE will be returned in case when CPU usage is not
+   * available. It is NOT advised to return any other error code.
+   *
+   * @return percentage CPU usage since the process-tree was created,
+   * {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
+   */
   @Override
   public float getCpuUsagePercent() {
     BigInteger processTotalMs = getTotalProcessMs();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index cfe6f801..b5c2747 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -455,6 +455,15 @@ public class ContainersMonitorImpl extends AbstractService 
implements
             // cpuUsagePercentPerCore should be 300% and
             // cpuUsageTotalCoresPercentage should be 50%
             float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
+            if (cpuUsagePercentPerCore < 0) {
+              // CPU usage is not available likely because the container just
+              // started. Let us skip this turn and consider this container
+              // in the next iteration.
+              LOG.info("Skipping monitoring container " + containerId
+                  + " since CPU usage is not yet available.");
+              continue;
+            }
+
             float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
                 resourceCalculatorPlugin.getNumProcessors();
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
new file mode 100644
index 0000000..eb35c91
--- /dev/null
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
+
+import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
+
+/**
+ * Mock class to obtain resource usage (CPU).
+ */
+public class MockCPUResourceCalculatorProcessTree
+    extends ResourceCalculatorProcessTree {
+
+  private long cpuPercentage = ResourceCalculatorProcessTree.UNAVAILABLE;
+
+  /**
+   * Constructor for MockCPUResourceCalculatorProcessTree with specified root
+   * process.
+   * @param root
+   */
+  public MockCPUResourceCalculatorProcessTree(String root) {
+    super(root);
+  }
+
+  @Override
+  public void updateProcessTree() {
+  }
+
+  @Override
+  public String getProcessTreeDump() {
+    return "";
+  }
+
+  @Override
+  public long getCumulativeCpuTime() {
+    return 0;
+  }
+
+  @Override
+  public boolean checkPidPgrpidForMatch() {
+    return true;
+  }
+
+  @Override
+  public float getCpuUsagePercent() {
+    long cpu = this.cpuPercentage;
+    // First getter call will be returned with -1, and other calls will
+    // return non-zero value as defined below.
+    if (cpu == ResourceCalculatorProcessTree.UNAVAILABLE) {
+      // Set a default value other than 0 for test.
+      this.cpuPercentage = 50;
+    }
+    return cpu;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
index c5aaa77..ff2a570 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
@@ -54,4 +54,9 @@ public class MockResourceCalculatorProcessTree extends 
ResourceCalculatorProcess
   public long getRssMemorySize() {
     return this.rssMemorySize;
   }
+
+  @Override
+  public float getCpuUsagePercent() {
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1500a0a3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
index 1a0c690..2df0c98 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
@@ -27,8 +27,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ExecutionType;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceUtilization;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -43,19 +43,21 @@ import 
org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext
 import 
org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
 import 
org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
 import 
org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
 
 import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertFalse;
 
 public class TestContainersMonitorResourceChange {
 
+  static final Logger LOG = Logger
+      .getLogger(TestContainersMonitorResourceChange.class);
   private ContainersMonitorImpl containersMonitor;
   private MockExecutor executor;
   private Configuration conf;
@@ -63,6 +65,8 @@ public class TestContainersMonitorResourceChange {
   private Context context;
   private MockContainerEventHandler containerEventHandler;
 
+  static final int WAIT_MS_PER_LOOP = 20; // 20 milli seconds
+
   private static class MockExecutor extends ContainerExecutor {
     @Override
     public void init() throws IOException {
@@ -232,6 +236,60 @@ public class TestContainersMonitorResourceChange {
     containersMonitor.stop();
   }
 
+  @Test
+  public void testContainersCPUResourceForDefaultValue() throws Exception {
+    Configuration newConf = new Configuration(conf);
+    // set container monitor interval to be 20s
+    newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
+    containersMonitor = createContainersMonitor(executor, dispatcher, context);
+    newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
+        MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
+    // set container monitor interval to be 20ms
+    containersMonitor.init(newConf);
+    containersMonitor.start();
+
+    // create container 1
+    containersMonitor.handle(new ContainerStartMonitoringEvent(
+        getContainerId(1), 2100L, 1000L, 1, 0, 0));
+
+    // Verify the container utilization value.
+    // Since MockCPUResourceCalculatorProcessTree will return a -1 as CPU
+    // utilization, containersUtilization will not be calculated and hence it
+    // will be 0.
+    assertEquals(
+        "Resource utilization must be default with MonitorThread's first run",
+        0, containersMonitor.getContainersUtilization()
+            .compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
+
+    // Verify the container utilization value. Since atleast one round is done,
+    // we can expect a non-zero value for container utilization as
+    // MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
+    waitForContainerResourceUtilizationChange(containersMonitor, 100);
+
+    containersMonitor.stop();
+  }
+
+  public static void waitForContainerResourceUtilizationChange(
+      ContainersMonitorImpl containersMonitor, int timeoutMsecs)
+      throws InterruptedException {
+    int timeWaiting = 0;
+    while (0 == containersMonitor.getContainersUtilization()
+        .compareTo(ResourceUtilization.newInstance(0, 0, 0.0f))) {
+      if (timeWaiting >= timeoutMsecs) {
+        break;
+      }
+
+      LOG.info(
+          "Monitor thread is waiting for resource utlization change.");
+      Thread.sleep(WAIT_MS_PER_LOOP);
+      timeWaiting += WAIT_MS_PER_LOOP;
+    }
+
+    assertTrue("Resource utilization is not changed from second run onwards",
+        0 != containersMonitor.getContainersUtilization()
+            .compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
+  }
+
   private ContainersMonitorImpl createContainersMonitor(
       ContainerExecutor containerExecutor, AsyncDispatcher dispatcher,
       Context context) {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to