Repository: hadoop Updated Branches: refs/heads/feature-YARN-2928 00452d3c7 -> 36d74ec41
YARN-3995. Some of the NM events are not getting published due race condition when AM container finishes in NM (Naganarasimha G R via sjlee) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/36d74ec4 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/36d74ec4 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/36d74ec4 Branch: refs/heads/feature-YARN-2928 Commit: 36d74ec41f1aac97ff3138e8e893cd6ac5bab608 Parents: 00452d3 Author: Sangjin Lee <[email protected]> Authored: Mon Jan 11 10:09:34 2016 -0800 Committer: Sangjin Lee <[email protected]> Committed: Mon Jan 11 10:09:34 2016 -0800 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../hadoop/yarn/conf/YarnConfiguration.java | 5 ++++ .../src/main/resources/yarn-default.xml | 7 ++++++ .../PerNodeTimelineCollectorsAuxService.java | 25 +++++++++++++------- ...TestPerNodeTimelineCollectorsAuxService.java | 11 +++++---- 5 files changed, 38 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5ff425c..cf4522a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -208,6 +208,9 @@ Branch YARN-2928: Timeline Server Next Generation: Phase 1 YARN-4350. TestDistributedShell fails for V2 scenarios. (Naganarasimha G R via varunsaxena) + YARN-3995. Some of the NM events are not getting published due race + condition when AM container finishes in NM (Naganarasimha G R via sjlee) + Trunk - Unreleased INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6a3854a..da9acb1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1610,6 +1610,11 @@ public class YarnConfiguration extends Configuration { public static final int DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS = 60; + public static final String ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS = + TIMELINE_SERVICE_PREFIX + "app-collector.linger-period.ms"; + + public static final int DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS = 1000; + // mark app-history related configs @Private as application history is going // to be integrated into the timeline service @Private http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index a9adbbf..13b952e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1984,6 +1984,13 @@ <value>60</value> </property> + <property> + <description>Time period till which the application collector will be alive + in NM, after the application master container finishes.</description> + <name>yarn.timeline-service.app-collector.linger-period.ms</name> + <value>1000</value> + </property> + <!-- Shared Cache Configuration --> <property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java index 0319e34..b738530 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java @@ -19,6 +19,9 @@ package org.apache.hadoop.yarn.server.timelineservice.collector; import java.nio.ByteBuffer; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -54,6 +57,8 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { private static final int SHUTDOWN_HOOK_PRIORITY = 30; private final NodeTimelineCollectorManager collectorManager; + private long collectorLingerPeriod; + private ScheduledExecutorService scheduler; public PerNodeTimelineCollectorsAuxService() { this(new NodeTimelineCollectorManager()); @@ -70,6 +75,10 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { if (!YarnConfiguration.timelineServiceV2Enabled(conf)) { throw new YarnException("Timeline service v2 is not enabled"); } + collectorLingerPeriod = + conf.getLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS, + YarnConfiguration.DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS); + scheduler = Executors.newSingleThreadScheduledExecutor(); collectorManager.init(conf); super.serviceInit(conf); } @@ -82,6 +91,12 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { @Override protected void serviceStop() throws Exception { + scheduler.shutdown(); + if (!scheduler.awaitTermination(collectorLingerPeriod, + TimeUnit.MILLISECONDS)) { + LOG.warn( + "Scheduler terminated before removing the application collectors"); + } collectorManager.stop(); super.serviceStop(); } @@ -141,17 +156,11 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { if (context.getContainerType() == ContainerType.APPLICATION_MASTER) { final ApplicationId appId = context.getContainerId().getApplicationAttemptId().getApplicationId(); - new Thread(new Runnable() { + scheduler.schedule(new Runnable() { public void run() { - try { - // TODO Temporary Fix until solution for YARN-3995 is finalized. - Thread.sleep(1000l); - } catch (InterruptedException e) { - e.printStackTrace(); - } removeApplication(appId); } - }).start(); + }, collectorLingerPeriod, TimeUnit.MILLISECONDS); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java index 4fdf47e..f2775d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java @@ -22,12 +22,14 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Mockito.any; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import java.io.IOException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Shell; @@ -45,8 +47,6 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.GetTimelineCollectorCon import org.junit.After; import org.junit.Test; -import java.io.IOException; - public class TestPerNodeTimelineCollectorsAuxService { private ApplicationAttemptId appAttemptId; private PerNodeTimelineCollectorsAuxService auxService; @@ -103,8 +103,9 @@ public class TestPerNodeTimelineCollectorsAuxService { when(context.getContainerType()).thenReturn( ContainerType.APPLICATION_MASTER); auxService.stopContainer(context); - - // TODO Temporary Fix until solution for YARN-3995 is finalized + // auxService should have the app's collector and need to remove only after + // a configured period + assertTrue(auxService.hasApplication(appAttemptId.getApplicationId())); for (int i = 0; i < 4; i++) { Thread.sleep(500l); if (!auxService.hasApplication(appAttemptId.getApplicationId())) {
