YARN-4454. NM to nodelabel mapping going wrong after RM restart. (Bibin A Chundatt via wangda)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bc038b38 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bc038b38 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bc038b38 Branch: refs/heads/yarn-2877 Commit: bc038b382cb2ce561ce718405fbcee4382f3b204 Parents: 2cb5aff Author: Wangda Tan <[email protected]> Authored: Mon Dec 21 11:30:13 2015 -0800 Committer: Wangda Tan <[email protected]> Committed: Mon Dec 21 11:30:13 2015 -0800 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 + .../nodelabels/CommonNodeLabelsManager.java | 3 +- .../server/resourcemanager/TestRMRestart.java | 73 ++++++++++++++++++-- 3 files changed, 74 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc038b38/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index fd1aa45..f575003 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1165,6 +1165,9 @@ Release 2.8.0 - UNRELEASED YARN-4461. Redundant nodeLocalityDelay log in LeafQueue (Eric Payne via jlowe) + YARN-4454. NM to nodelabel mapping going wrong after RM restart. + (Bibin A Chundatt via wangda) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc038b38/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java index 8b26cc5..172a737 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -1074,7 +1075,7 @@ public class CommonNodeLabelsManager extends AbstractService { protected Map<NodeId, Set<String>> normalizeNodeIdToLabels( Map<NodeId, Set<String>> nodeIdToLabels) { - Map<NodeId, Set<String>> newMap = new HashMap<NodeId, Set<String>>(); + Map<NodeId, Set<String>> newMap = new TreeMap<NodeId, Set<String>>(); for (Entry<NodeId, Set<String>> entry : nodeIdToLabels.entrySet()) { NodeId id = entry.getKey(); Set<String> labels = entry.getValue(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc038b38/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index d2b8eee..bad68f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -2141,11 +2141,11 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { // Add node Label to Node h1->x NodeId n1 = NodeId.newInstance("h1", 0); nodeLabelManager.addLabelsToNode(ImmutableMap.of(n1, toSet("x"))); - + clusterNodeLabels.remove("z"); // Remove cluster label z nodeLabelManager.removeFromClusterNodeLabels(toSet("z")); - + // Replace nodelabel h1->x,y nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("y"))); @@ -2179,8 +2179,8 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { rm2.start(); nodeLabelManager = rm2.getRMContext().getNodeLabelManager(); - Assert.assertEquals(clusterNodeLabels.size(), nodeLabelManager - .getClusterNodeLabelNames().size()); + Assert.assertEquals(clusterNodeLabels.size(), + nodeLabelManager.getClusterNodeLabelNames().size()); nodeLabels = nodeLabelManager.getNodeLabels(); Assert.assertEquals(1, nodeLabelManager.getNodeLabels().size()); @@ -2256,4 +2256,69 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { return set; } + @Test(timeout = 20000) + public void testRMRestartNodeMapping() throws Exception { + // Initial FS node label store root dir to a random tmp dir + File nodeLabelFsStoreDir = new File("target", + this.getClass().getSimpleName() + "-testRMRestartNodeMapping"); + if (nodeLabelFsStoreDir.exists()) { + FileUtils.deleteDirectory(nodeLabelFsStoreDir); + } + nodeLabelFsStoreDir.deleteOnExit(); + String nodeLabelFsStoreDirURI = nodeLabelFsStoreDir.toURI().toString(); + conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, + nodeLabelFsStoreDirURI); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + MockRM rm1 = new MockRM(conf, memStore) { + @Override + protected RMNodeLabelsManager createNodeLabelManager() { + RMNodeLabelsManager mgr = new RMNodeLabelsManager(); + mgr.init(getConfig()); + return mgr; + } + }; + rm1.init(conf); + rm1.start(); + RMNodeLabelsManager nodeLabelManager = + rm1.getRMContext().getNodeLabelManager(); + + Set<String> clusterNodeLabels = new HashSet<String>(); + clusterNodeLabels.add("x"); + clusterNodeLabels.add("y"); + nodeLabelManager + .addToCluserNodeLabelsWithDefaultExclusivity(clusterNodeLabels); + // Add node Label to Node h1->x + NodeId n1 = NodeId.newInstance("h1", 1234); + NodeId n2 = NodeId.newInstance("h1", 1235); + NodeId nihost = NodeId.newInstance("h1", 0); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n2, toSet("x"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(nihost, toSet("y"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x"))); + MockRM rm2 = null; + for (int i = 0; i < 2; i++) { + rm2 = new MockRM(conf, memStore) { + @Override + protected RMNodeLabelsManager createNodeLabelManager() { + RMNodeLabelsManager mgr = new RMNodeLabelsManager(); + mgr.init(getConfig()); + return mgr; + } + }; + rm2.init(conf); + rm2.start(); + + nodeLabelManager = rm2.getRMContext().getNodeLabelManager(); + Map<String, Set<NodeId>> labelsToNodes = + nodeLabelManager.getLabelsToNodes(toSet("x")); + Assert.assertEquals(1, + null == labelsToNodes.get("x") ? 0 : labelsToNodes.get("x").size()); + } + rm1.stop(); + rm2.stop(); + } + }
