YARN-4756. Unnecessary wait in Node Status Updater during reboot. (Eric Badger via kasha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e82f961a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e82f961a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e82f961a Branch: refs/heads/HDFS-1312 Commit: e82f961a3925aadf9e53a009820a48ba9e4f78b6 Parents: a62637a Author: Karthik Kambatla <ka...@cloudera.com> Authored: Thu Apr 7 17:05:29 2016 -0700 Committer: Karthik Kambatla <ka...@cloudera.com> Committed: Thu Apr 7 17:05:29 2016 -0700 ---------------------------------------------------------------------- .../nodemanager/NodeStatusUpdaterImpl.java | 1 + .../nodemanager/TestNodeManagerResync.java | 33 +++++++++++++------- 2 files changed, 23 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e82f961a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index ad983fe..72769bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -284,6 +284,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return; } this.isStopped = true; + sendOutofBandHeartBeat(); try { statusUpdater.join(); registerWithRM(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/e82f961a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index e8c4634..b3d44f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -108,6 +108,7 @@ public class TestNodeManagerResync { static final String user = "nobody"; private FileContext localFS; private CyclicBarrier syncBarrier; + private CyclicBarrier updateBarrier; private AtomicBoolean assertionFailedInThread = new AtomicBoolean(false); private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false); private final NodeManagerEvent resyncEvent = @@ -125,6 +126,7 @@ public class TestNodeManagerResync { remoteLogsDir.mkdirs(); nmLocalDir.mkdirs(); syncBarrier = new CyclicBarrier(2); + updateBarrier = new CyclicBarrier(2); } @After @@ -803,9 +805,11 @@ public class TestNodeManagerResync { .getContainerStatuses(gcsRequest).getContainerStatuses().get(0); assertEquals(Resource.newInstance(1024, 1), containerStatus.getCapability()); + updateBarrier.await(); // Call the actual rebootNodeStatusUpdaterAndRegisterWithRM(). // This function should be synchronized with // increaseContainersResource(). + updateBarrier.await(); super.rebootNodeStatusUpdaterAndRegisterWithRM(); // Check status after registerWithRM containerStatus = getContainerManager() @@ -831,17 +835,24 @@ public class TestNodeManagerResync { List<Token> increaseTokens = new ArrayList<Token>(); // Add increase request. Resource targetResource = Resource.newInstance(4096, 2); - try { - increaseTokens.add(getContainerToken(targetResource)); - IncreaseContainersResourceRequest increaseRequest = - IncreaseContainersResourceRequest.newInstance(increaseTokens); - IncreaseContainersResourceResponse increaseResponse = - getContainerManager() - .increaseContainersResource(increaseRequest); - Assert.assertEquals( - 1, increaseResponse.getSuccessfullyIncreasedContainers() - .size()); - Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty()); + try{ + try { + updateBarrier.await(); + increaseTokens.add(getContainerToken(targetResource)); + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest.newInstance(increaseTokens); + IncreaseContainersResourceResponse increaseResponse = + getContainerManager() + .increaseContainersResource(increaseRequest); + Assert.assertEquals( + 1, increaseResponse.getSuccessfullyIncreasedContainers() + .size()); + Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty()); + } catch (Exception e) { + e.printStackTrace(); + } finally { + updateBarrier.await(); + } } catch (Exception e) { e.printStackTrace(); }