YARN-4906. Capture container start/finish time in container metrics. 
Contributed by Jian He.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b41e65e5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b41e65e5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b41e65e5

Branch: refs/heads/YARN-3368
Commit: b41e65e5bc9459b4d950a2c53860a223f1a0d2ec
Parents: 21eb428
Author: Varun Vasudev <vvasu...@apache.org>
Authored: Wed Apr 6 13:41:33 2016 +0530
Committer: Varun Vasudev <vvasu...@apache.org>
Committed: Wed Apr 6 13:41:33 2016 +0530

----------------------------------------------------------------------
 .../container/ContainerImpl.java                | 22 ++++++++++++++++++++
 .../monitor/ContainerMetrics.java               | 18 ++++++++++++++++
 .../containermanager/TestAuxServices.java       |  2 +-
 .../container/TestContainer.java                | 11 ++++++++++
 4 files changed, 52 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b41e65e5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index da8a3a6..a43a005 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -65,6 +65,7 @@ import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEvent;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEventType;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
@@ -100,6 +101,7 @@ public class ContainerImpl implements Container {
   private boolean wasLaunched;
   private long containerLocalizationStartTime;
   private long containerLaunchStartTime;
+  private ContainerMetrics containerMetrics;
   private static Clock clock = SystemClock.getInstance();
 
   /** The NM-wide configuration - not specific to this container */
@@ -147,6 +149,21 @@ public class ContainerImpl implements Container {
     this.readLock = readWriteLock.readLock();
     this.writeLock = readWriteLock.writeLock();
     this.context = context;
+    boolean containerMetricsEnabled =
+        conf.getBoolean(YarnConfiguration.NM_CONTAINER_METRICS_ENABLE,
+            YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_ENABLE);
+
+    if (containerMetricsEnabled) {
+      long flushPeriod =
+          conf.getLong(YarnConfiguration.NM_CONTAINER_METRICS_PERIOD_MS,
+              YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_PERIOD_MS);
+      long unregisterDelay = conf.getLong(
+          YarnConfiguration.NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS,
+          YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS);
+      containerMetrics = ContainerMetrics
+          .forContainer(containerId, flushPeriod, unregisterDelay);
+      containerMetrics.recordStartTime(clock.getTime());
+    }
 
     stateMachine = stateMachineFactory.make(this);
   }
@@ -989,6 +1006,11 @@ public class ContainerImpl implements Container {
     @SuppressWarnings("unchecked")
     public void transition(ContainerImpl container, ContainerEvent event) {
       container.metrics.releaseContainer(container.resource);
+      if (container.containerMetrics != null) {
+        container.containerMetrics
+            .recordFinishTimeAndExitCode(clock.getTime(), container.exitCode);
+        container.containerMetrics.finished();
+      }
       container.sendFinishedEvents();
       //if the current state is NEW it means the CONTAINER_INIT was never 
       // sent for the event, thus no need to send the CONTAINER_STOP

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b41e65e5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
index 9d17db0..f85431e 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
@@ -100,6 +100,15 @@ public class ContainerMetrics implements MetricsSource {
   @Metric
   public MutableGaugeLong localizationDurationMs;
 
+  @Metric
+  public MutableGaugeLong startTime;
+
+  @Metric
+  public MutableGaugeLong finishTime;
+
+  @Metric
+  public MutableGaugeInt exitCode;
+
   static final MetricsInfo RECORD_INFO =
       info("ContainerResource", "Resource limit and usage by container");
 
@@ -277,6 +286,15 @@ public class ContainerMetrics implements MetricsSource {
     this.localizationDurationMs.set(localizationDuration);
   }
 
+  public void recordStartTime(long startTime) {
+    this.startTime.set(startTime);
+  }
+
+  public void recordFinishTimeAndExitCode(long finishTime, int exitCode) {
+    this.finishTime.set(finishTime);
+    this.exitCode.set(exitCode);
+  }
+
   private synchronized void scheduleTimerTaskIfRequired() {
     if (flushPeriodMs > 0) {
       // Lazily initialize timer

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b41e65e5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
index 91466e8..9d0d0c0 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
@@ -195,7 +195,7 @@ public class TestAuxServices {
         ContainerId.newContainerId(attemptId, 1), "", "",
         Resource.newInstance(1, 1), 0,0,0, Priority.newInstance(0), 0);
     Context context = mock(Context.class);
-    Container container = new ContainerImpl(null, null, null, null,
+    Container container = new ContainerImpl(new YarnConfiguration(), null, 
null, null,
         null, cti, context);
     ContainerId containerId = container.getContainerId();
     Resource resource = container.getResource();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b41e65e5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 3e06236..cc98bdc 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -85,6 +85,7 @@ import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent;
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
@@ -333,6 +334,7 @@ public class TestContainer {
   @Test
   public void testKillOnNew() throws Exception {
     WrappedContainer wc = null;
+
     try {
       wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
       assertEquals(ContainerState.NEW, wc.c.getContainerState());
@@ -345,6 +347,15 @@ public class TestContainer {
       assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
           .contains("KillRequest"));
       assertEquals(killed + 1, metrics.getKilledContainers());
+      // check container metrics is generated.
+      ContainerMetrics containerMetrics =
+          ContainerMetrics.forContainer(wc.cId, 1, 5000);
+      Assert.assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
+          containerMetrics.exitCode.value());
+      Assert.assertTrue(containerMetrics.startTime.value() > 0);
+      Assert.assertTrue(
+          containerMetrics.finishTime.value() > containerMetrics.startTime
+              .value());
     } finally {
       if (wc != null) {
         wc.finished();

Reply via email to