Repository: hadoop Updated Branches: refs/heads/trunk 9f9a2222a -> 8269bfa61
YARN-2523. ResourceManager UI showing negative value for "Decommissioned Nodes" field. Contributed by Rohith Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8269bfa6 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8269bfa6 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8269bfa6 Branch: refs/heads/trunk Commit: 8269bfa613999f71767de3c0369817b58cfe1416 Parents: 9f9a222 Author: Jason Lowe <[email protected]> Authored: Thu Sep 25 22:37:05 2014 +0000 Committer: Jason Lowe <[email protected]> Committed: Thu Sep 25 22:37:05 2014 +0000 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../resourcemanager/NodesListManager.java | 1 - .../resourcemanager/rmnode/RMNodeImpl.java | 13 ---------- .../server/resourcemanager/TestRMRestart.java | 26 +++++++++++++++++--- .../TestResourceTrackerService.java | 25 +++++++++++++++++-- 5 files changed, 48 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8269bfa6/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index aaac7b7..bbda48d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -447,6 +447,9 @@ Release 2.6.0 - UNRELEASED YARN-2546. Made REST API for application creation/submission use numeric and boolean types instead of the string of them. (Varun Vasudev via zjshen) + YARN-2523. ResourceManager UI showing negative value for "Decommissioned + Nodes" field (Rohith via jlowe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/8269bfa6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java index d7797cc..90d7b51 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java @@ -121,7 +121,6 @@ public class NodesListManager extends AbstractService implements this.conf, includesFile), excludesFile.isEmpty() ? null : this.rmContext.getConfigurationProvider() .getConfigurationInputStream(this.conf, excludesFile)); - setDecomissionedNMsMetrics(); printConfiguredHosts(); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8269bfa6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index f0ae826..1123a98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -460,22 +460,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> { break; } - // Decomissioned NMs equals to the nodes missing in include list (if - // include list not empty) or the nodes listed in excluded list. - // DecomissionedNMs as per exclude list is set upfront when the - // exclude list is read so that RM restart can also reflect the - // decomissionedNMs. Note that RM is still not able to know decomissionedNMs - // as per include list after it restarts as they are known when those nodes - // come for registration. - // DecomissionedNMs as per include list is incremented in this transition. switch (finalState) { case DECOMMISSIONED: - Set<String> ecludedHosts = - context.getNodesListManager().getHostsReader().getExcludedHosts(); - if (!ecludedHosts.contains(hostName) - && !ecludedHosts.contains(NetUtils.normalizeHostName(hostName))) { metrics.incrDecommisionedNMs(); - } break; case LOST: metrics.incrNumLostNMs(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/8269bfa6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index caa5647..0b3a364 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -77,6 +77,8 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; @@ -1833,10 +1835,16 @@ public class TestRMRestart { conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); writeToHostsFile(""); - MockRM rm1 = new MockRM(conf); + final DrainDispatcher dispatcher = new DrainDispatcher(); + MockRM rm1 = new MockRM(conf) { + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; rm1.start(); - rm1.registerNode("localhost:1234", 8000); - rm1.registerNode("host2:1234", 8000); + MockNM nm1 = rm1.registerNode("localhost:1234", 8000); + MockNM nm2 = rm1.registerNode("host2:1234", 8000); Assert .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); String ip = NetUtils.normalizeHostName("localhost"); @@ -1845,15 +1853,25 @@ public class TestRMRestart { // refresh nodes rm1.getNodesListManager().refreshNodes(conf); + NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert + .assertTrue(NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertTrue("The decommisioned metrics are not updated", + NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + + dispatcher.await(); Assert .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); + rm1.stop(); + Assert + .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); // restart RM. MockRM rm2 = new MockRM(conf); rm2.start(); Assert .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); - rm1.stop(); rm2.stop(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8269bfa6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 877a122..077f464 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -157,25 +157,33 @@ public class TestResourceTrackerService { .getAbsolutePath()); writeToHostsFile(""); - rm = new MockRM(conf); + final DrainDispatcher dispatcher = new DrainDispatcher(); + rm = new MockRM(conf) { + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); + int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + dispatcher.await(); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host2", ip); rm.getNodesListManager().refreshNodes(conf); - checkDecommissionedNMCount(rm, metricCount + 2); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); @@ -186,6 +194,19 @@ public class TestResourceTrackerService { nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue("The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + dispatcher.await(); + checkDecommissionedNMCount(rm, metricCount + 2); + writeToHostsFile(""); + rm.getNodesListManager().refreshNodes(conf); + + nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); + nodeHeartbeat = nm3.nodeHeartbeat(true); + dispatcher.await(); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + // decommissined node is 1 since 1 node is rejoined after updating exclude + // file + checkDecommissionedNMCount(rm, metricCount + 1); } /**
