YARN-4906. Capture container start/finish time in container metrics. Contributed by Jian He.
(cherry picked from commit b41e65e5bc9459b4d950a2c53860a223f1a0d2ec) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8f9b97cc Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8f9b97cc Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8f9b97cc Branch: refs/heads/branch-2 Commit: 8f9b97ccce4ce92f6f1c16ba8ee61c7496bedd43 Parents: 11e796b Author: Varun Vasudev <vvasu...@apache.org> Authored: Wed Apr 6 13:41:33 2016 +0530 Committer: Varun Vasudev <vvasu...@apache.org> Committed: Wed Apr 6 13:42:06 2016 +0530 ---------------------------------------------------------------------- .../container/ContainerImpl.java | 22 ++++++++++++++++++++ .../monitor/ContainerMetrics.java | 18 ++++++++++++++++ .../containermanager/TestAuxServices.java | 2 +- .../container/TestContainer.java | 11 ++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f9b97cc/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index da8a3a6..a43a005 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.Context; @@ -100,6 +101,7 @@ public class ContainerImpl implements Container { private boolean wasLaunched; private long containerLocalizationStartTime; private long containerLaunchStartTime; + private ContainerMetrics containerMetrics; private static Clock clock = SystemClock.getInstance(); /** The NM-wide configuration - not specific to this container */ @@ -147,6 +149,21 @@ public class ContainerImpl implements Container { this.readLock = readWriteLock.readLock(); this.writeLock = readWriteLock.writeLock(); this.context = context; + boolean containerMetricsEnabled = + conf.getBoolean(YarnConfiguration.NM_CONTAINER_METRICS_ENABLE, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_ENABLE); + + if (containerMetricsEnabled) { + long flushPeriod = + conf.getLong(YarnConfiguration.NM_CONTAINER_METRICS_PERIOD_MS, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_PERIOD_MS); + long unregisterDelay = conf.getLong( + YarnConfiguration.NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS); + containerMetrics = ContainerMetrics + .forContainer(containerId, flushPeriod, unregisterDelay); + containerMetrics.recordStartTime(clock.getTime()); + } stateMachine = stateMachineFactory.make(this); } @@ -989,6 +1006,11 @@ public class ContainerImpl implements Container { @SuppressWarnings("unchecked") public void transition(ContainerImpl container, ContainerEvent event) { container.metrics.releaseContainer(container.resource); + if (container.containerMetrics != null) { + container.containerMetrics + .recordFinishTimeAndExitCode(clock.getTime(), container.exitCode); + container.containerMetrics.finished(); + } container.sendFinishedEvents(); //if the current state is NEW it means the CONTAINER_INIT was never // sent for the event, thus no need to send the CONTAINER_STOP http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f9b97cc/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index 9d17db0..f85431e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -100,6 +100,15 @@ public class ContainerMetrics implements MetricsSource { @Metric public MutableGaugeLong localizationDurationMs; + @Metric + public MutableGaugeLong startTime; + + @Metric + public MutableGaugeLong finishTime; + + @Metric + public MutableGaugeInt exitCode; + static final MetricsInfo RECORD_INFO = info("ContainerResource", "Resource limit and usage by container"); @@ -277,6 +286,15 @@ public class ContainerMetrics implements MetricsSource { this.localizationDurationMs.set(localizationDuration); } + public void recordStartTime(long startTime) { + this.startTime.set(startTime); + } + + public void recordFinishTimeAndExitCode(long finishTime, int exitCode) { + this.finishTime.set(finishTime); + this.exitCode.set(exitCode); + } + private synchronized void scheduleTimerTaskIfRequired() { if (flushPeriodMs > 0) { // Lazily initialize timer http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f9b97cc/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java index 91466e8..9d0d0c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java @@ -195,7 +195,7 @@ public class TestAuxServices { ContainerId.newContainerId(attemptId, 1), "", "", Resource.newInstance(1, 1), 0,0,0, Priority.newInstance(0), 0); Context context = mock(Context.class); - Container container = new ContainerImpl(null, null, null, null, + Container container = new ContainerImpl(new YarnConfiguration(), null, null, null, null, cti, context); ContainerId containerId = container.getContainerId(); Resource resource = container.getResource(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f9b97cc/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 3e06236..cc98bdc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -85,6 +85,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; @@ -333,6 +334,7 @@ public class TestContainer { @Test public void testKillOnNew() throws Exception { WrappedContainer wc = null; + try { wc = new WrappedContainer(13, 314159265358979L, 4344, "yak"); assertEquals(ContainerState.NEW, wc.c.getContainerState()); @@ -345,6 +347,15 @@ public class TestContainer { assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() .contains("KillRequest")); assertEquals(killed + 1, metrics.getKilledContainers()); + // check container metrics is generated. + ContainerMetrics containerMetrics = + ContainerMetrics.forContainer(wc.cId, 1, 5000); + Assert.assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, + containerMetrics.exitCode.value()); + Assert.assertTrue(containerMetrics.startTime.value() > 0); + Assert.assertTrue( + containerMetrics.finishTime.value() > containerMetrics.startTime + .value()); } finally { if (wc != null) { wc.finished();