YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)

Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/def12933
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/def12933
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/def12933

Branch: refs/heads/YARN-1197
Commit: def12933b38efd5e47c5144b729c1a1496f09229
Parents: 8dfec7a
Author: Karthik Kambatla <ka...@apache.org>
Authored: Sun Aug 16 06:24:16 2015 -0700
Committer: Karthik Kambatla <ka...@apache.org>
Committed: Sun Aug 16 06:24:16 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |   2 +
 .../hadoop/yarn/conf/YarnConfiguration.java     |  11 +-
 .../src/main/resources/yarn-default.xml         |  19 ++-
 .../server/nodemanager/NodeResourceMonitor.java |  10 +-
 .../nodemanager/NodeResourceMonitorImpl.java    | 140 +++++++++++++++++++
 .../monitor/ContainersMonitorImpl.java          |   8 +-
 .../nodemanager/TestNodeResourceMonitor.java    |  35 +++++
 .../launcher/TestContainerLaunch.java           |   2 +-
 .../monitor/TestContainersMonitor.java          |   2 +-
 9 files changed, 220 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index c451320..287a913 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -170,6 +170,8 @@ Release 2.8.0 - UNRELEASED
     YARN-4023. Publish Application Priority to TimelineServer. (Sunil G 
     via rohithsharmaks)
 
+    YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha)
+
   IMPROVEMENTS
 
     YARN-644. Basic null check is not performed on passed in arguments before

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 93f7ed6..6c438f2 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -960,12 +960,21 @@ public class YarnConfiguration extends Configuration {
   public static final int DEFAULT_NM_WEBAPP_HTTPS_PORT = 8044;
   public static final String DEFAULT_NM_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:"
       + DEFAULT_NM_WEBAPP_HTTPS_PORT; 
-  
+
+  /** How often to monitor resource in a node.*/
+  public static final String NM_RESOURCE_MON_INTERVAL_MS =
+      NM_PREFIX + "resource-monitor.interval-ms";
+  public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000;
+
   /** How often to monitor containers.*/
   public final static String NM_CONTAINER_MON_INTERVAL_MS =
     NM_PREFIX + "container-monitor.interval-ms";
+  @Deprecated
   public final static int DEFAULT_NM_CONTAINER_MON_INTERVAL_MS = 3000;
 
+  /** Class that calculates current resource utilization.*/
+  public static final String NM_MON_RESOURCE_CALCULATOR =
+      NM_PREFIX + "resource-calculator.class";
   /** Class that calculates containers current resource utilization.*/
   public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
     NM_PREFIX + "container-monitor.resource-calculator.class";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 402377d..53face0 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1235,13 +1235,26 @@
   </property>
 
   <property>
-    <description>How often to monitor containers.</description>
-    <name>yarn.nodemanager.container-monitor.interval-ms</name>
+    <description>How often to monitor the node and the 
containers.</description>
+    <name>yarn.nodemanager.resource-monitor.interval-ms</name>
     <value>3000</value>
   </property>
 
   <property>
-    <description>Class that calculates containers current resource 
utilization.</description>
+    <description>Class that calculates current resource 
utilization.</description>
+    <name>yarn.nodemanager.resource-calculator.class</name>
+  </property>
+
+  <property>
+    <description>How often to monitor containers. If not set, the value for
+    yarn.nodemanager.resource-monitor.interval-ms will be used.</description>
+    <name>yarn.nodemanager.container-monitor.interval-ms</name>
+  </property>
+
+  <property>
+    <description>Class that calculates containers current resource utilization.
+    If not set, the value for yarn.nodemanager.resource-calculator.class will
+    be used.</description>
     <name>yarn.nodemanager.container-monitor.resource-calculator.class</name>
   </property>
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java
index be13d22..6c5a15a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitor.java
@@ -19,7 +19,15 @@
 package org.apache.hadoop.yarn.server.nodemanager;
 
 import org.apache.hadoop.service.Service;
+import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
 
+/**
+ * Interface for monitoring the resources of a node.
+ */
 public interface NodeResourceMonitor extends Service {
-
+  /**
+   * Get the <em>resource utilization</em> of the node.
+   * @return <em>resource utilization</em> of the node.
+   */
+  public ResourceUtilization getUtilization();
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
index ea82546..dcdaa0a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
@@ -18,13 +18,153 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
+import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
 
+/**
+ * Implementation of the node resource monitor. It periodically tracks the
+ * resource utilization of the node and reports it to the NM.
+ */
 public class NodeResourceMonitorImpl extends AbstractService implements
     NodeResourceMonitor {
 
+  /** Logging infrastructure. */
+  final static Log LOG = LogFactory
+      .getLog(NodeResourceMonitorImpl.class);
+
+  /** Interval to monitor the node resource utilization. */
+  private long monitoringInterval;
+  /** Thread to monitor the node resource utilization. */
+  private MonitoringThread monitoringThread;
+
+  /** Resource calculator. */
+  private ResourceCalculatorPlugin resourceCalculatorPlugin;
+
+  /** Current <em>resource utilization</em> of the node. */
+  private ResourceUtilization nodeUtilization;
+
+  /**
+   * Initialize the node resource monitor.
+   */
   public NodeResourceMonitorImpl() {
     super(NodeResourceMonitorImpl.class.getName());
+
+    this.monitoringThread = new MonitoringThread();
+  }
+
+  /**
+   * Initialize the service with the proper parameters.
+   */
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+    this.monitoringInterval =
+        conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
+            YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS);
+
+    Class<? extends ResourceCalculatorPlugin> clazz =
+        conf.getClass(YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
+            ResourceCalculatorPlugin.class);
+
+    this.resourceCalculatorPlugin =
+        ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
+
+    LOG.info(" Using ResourceCalculatorPlugin : "
+        + this.resourceCalculatorPlugin);
   }
 
+  /**
+   * Check if we should be monitoring.
+   * @return <em>true</em> if we can monitor the node resource utilization.
+   */
+  private boolean isEnabled() {
+    if (resourceCalculatorPlugin == null) {
+      LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
+          + this.getClass().getName() + " is disabled.");
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Start the thread that does the node resource utilization monitoring.
+   */
+  @Override
+  protected void serviceStart() throws Exception {
+    if (this.isEnabled()) {
+      this.monitoringThread.start();
+    }
+    super.serviceStart();
+  }
+
+  /**
+   * Stop the thread that does the node resource utilization monitoring.
+   */
+  @Override
+  protected void serviceStop() throws Exception {
+    if (this.isEnabled()) {
+      this.monitoringThread.interrupt();
+      try {
+        this.monitoringThread.join(10 * 1000);
+      } catch (InterruptedException e) {
+        LOG.warn("Could not wait for the thread to join");
+      }
+    }
+    super.serviceStop();
+  }
+
+  /**
+   * Thread that monitors the resource utilization of this node.
+   */
+  private class MonitoringThread extends Thread {
+    /**
+     * Initialize the node resource monitoring thread.
+     */
+    public MonitoringThread() {
+      super("Node Resource Monitor");
+      this.setDaemon(true);
+    }
+
+    /**
+     * Periodically monitor the resource utilization of the node.
+     */
+    @Override
+    public void run() {
+      while (true) {
+        // Get node utilization and save it into the health status
+        long pmem = resourceCalculatorPlugin.getPhysicalMemorySize() -
+            resourceCalculatorPlugin.getAvailablePhysicalMemorySize();
+        long vmem =
+            resourceCalculatorPlugin.getVirtualMemorySize()
+                - resourceCalculatorPlugin.getAvailableVirtualMemorySize();
+        float cpu = resourceCalculatorPlugin.getCpuUsage();
+        nodeUtilization =
+            ResourceUtilization.newInstance(
+                (int) (pmem >> 20), // B -> MB
+                (int) (vmem >> 20), // B -> MB
+                cpu); // 1 CPU at 100% is 1
+
+        try {
+          Thread.sleep(monitoringInterval);
+        } catch (InterruptedException e) {
+          LOG.warn(NodeResourceMonitorImpl.class.getName()
+              + " is interrupted. Exiting.");
+          break;
+        }
+      }
+    }
+  }
+
+  /**
+   * Get the <em>resource utilization</em> of the node.
+   * @return <em>resource utilization</em> of the node.
+   */
+  @Override
+  public ResourceUtilization getUtilization() {
+    return this.nodeUtilization;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index 57d1bad..89dc980 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -100,10 +100,14 @@ public class ContainersMonitorImpl extends 
AbstractService implements
   protected void serviceInit(Configuration conf) throws Exception {
     this.monitoringInterval =
         conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS,
-            YarnConfiguration.DEFAULT_NM_CONTAINER_MON_INTERVAL_MS);
+            conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
+                YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS));
 
     Class<? extends ResourceCalculatorPlugin> clazz =
-        conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, 
null,
+        conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
+            conf.getClass(
+                YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
+                ResourceCalculatorPlugin.class),
             ResourceCalculatorPlugin.class);
     this.resourceCalculatorPlugin =
         ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java
new file mode 100644
index 0000000..3c2c386
--- /dev/null
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
+
+import org.junit.Test;
+
+public class TestNodeResourceMonitor extends BaseContainerManagerTest {
+  public TestNodeResourceMonitor() throws UnsupportedFileSystemException {
+    super();
+  }
+
+  @Test
+  public void testNodeResourceMonitor() {
+    NodeResourceMonitor nrm = new NodeResourceMonitorImpl();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
index 2856357..ea6bb1d 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
@@ -125,7 +125,7 @@ public class TestContainerLaunch extends 
BaseContainerManagerTest {
   @Before
   public void setup() throws IOException {
     conf.setClass(
-        YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
+        YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
         LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
     super.setup();
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/def12933/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
index 909a962..3803144 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
@@ -86,7 +86,7 @@ public class TestContainersMonitor extends 
BaseContainerManagerTest {
   @Before
   public void setup() throws IOException {
     conf.setClass(
-        YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
+        YarnConfiguration.NM_MON_RESOURCE_CALCULATOR,
         LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
     conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
     super.setup();

Reply via email to