YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue on nodelabel recovery. (addendum patch). Contributed by Bibin A chundatt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b4078bd1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b4078bd1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b4078bd1 Branch: refs/heads/HDFS-1312 Commit: b4078bd17b41fbfff5a5c0bca5bf903a327826a7 Parents: 4b0f55b Author: Naganarasimha <naganarasimha...@apache.org> Authored: Tue May 24 08:06:53 2016 +0800 Committer: Naganarasimha <naganarasimha...@apache.org> Committed: Tue May 24 08:06:53 2016 +0800 ---------------------------------------------------------------------- .../nodelabels/CommonNodeLabelsManager.java | 18 +++++- .../nodelabels/RMNodeLabelsManager.java | 9 ++- .../nodelabels/TestRMNodeLabelsManager.java | 67 ++++++++++++++------ 3 files changed, 70 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b4078bd1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java index 172a737..1a83632 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java @@ -75,7 +75,9 @@ public class CommonNodeLabelsManager extends AbstractService { private static final Pattern LABEL_PATTERN = Pattern .compile("^[0-9a-zA-Z][0-9a-zA-Z-_]*"); public static final int WILDCARD_PORT = 0; - + // Flag to identify startup for removelabel + private boolean initNodeLabelStoreInProgress = false; + /** * Error messages */ @@ -226,6 +228,13 @@ public class CommonNodeLabelsManager extends AbstractService { labelCollections.put(NO_LABEL, new RMNodeLabel(NO_LABEL)); } + /** + * @return the isStartup + */ + protected boolean isInitNodeLabelStoreInProgress() { + return initNodeLabelStoreInProgress; + } + boolean isCentralizedConfiguration() { return isCentralizedNodeLabelConfiguration; } @@ -252,7 +261,9 @@ public class CommonNodeLabelsManager extends AbstractService { @Override protected void serviceStart() throws Exception { if (nodeLabelsEnabled) { + setInitNodeLabelStoreInProgress(true); initNodeLabelStore(getConfig()); + setInitNodeLabelStoreInProgress(false); } // init dispatcher only when service start, because recover will happen in @@ -1083,4 +1094,9 @@ public class CommonNodeLabelsManager extends AbstractService { } return newMap; } + + public void setInitNodeLabelStoreInProgress( + boolean initNodeLabelStoreInProgress) { + this.initNodeLabelStoreInProgress = initNodeLabelStoreInProgress; + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/b4078bd1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java index 62922ad..5dc8392 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java @@ -33,7 +33,6 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.service.Service; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -114,13 +113,13 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager { throws IOException { try { writeLock.lock(); - if (getServiceState() == Service.STATE.STARTED) { + if (!isInitNodeLabelStoreInProgress()) { // We cannot remove node labels from collection when some queue(s) are // using any of them. - // We will only do this check when service starting finished. Before + // We will not do remove when recovery is in prpgress. During // service starting, we will replay edit logs and recover state. It is - // possible that a history operation removed some labels which were being - // used by some queues in the past but not used by current queues. + // possible that a history operation removed some labels which were not + // used by some queues in the past but are used by current queues. checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove); } // copy before NMs http://git-wip-us.apache.org/repos/asf/hadoop/blob/b4078bd1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java index 43fd588..4a6abbe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java @@ -33,7 +33,9 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.Service; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; @@ -44,6 +46,7 @@ import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase; import org.apache.hadoop.yarn.nodelabels.RMNodeLabel; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeLabelsUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; @@ -606,6 +609,23 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { @Test(timeout = 60000) public void testcheckRemoveFromClusterNodeLabelsOfQueue() throws Exception { + lmgr = new RMNodeLabelsManager(); + Configuration conf = new Configuration(); + File tempDir = File.createTempFile("nlb", ".tmp"); + tempDir.delete(); + tempDir.mkdirs(); + tempDir.deleteOnExit(); + conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, + tempDir.getAbsolutePath()); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + conf.set(YarnConfiguration.RM_SCHEDULER, + "org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler"); + Configuration withQueueLabels = getConfigurationWithQueueLabels(conf); + MockRM rm = initRM(conf); + lmgr.addToCluserNodeLabels(toSet(NodeLabel.newInstance("x", false))); + lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "x" })); + lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("x")); + rm.stop(); class TestRMLabelManger extends RMNodeLabelsManager { @Override protected void checkRemoveFromClusterNodeLabelsOfQueue( @@ -613,34 +633,45 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { checkQueueCall = true; // Do nothing } - } lmgr = new TestRMLabelManger(); - Configuration conf = new Configuration(); - File tempDir = File.createTempFile("nlb", ".tmp"); - tempDir.delete(); - tempDir.mkdirs(); - tempDir.deleteOnExit(); - conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, - tempDir.getAbsolutePath()); - conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + MockRM rm2 = initRM(withQueueLabels); + Assert.assertFalse( + "checkRemoveFromClusterNodeLabelsOfQueue should not be called" + + "on recovery", + checkQueueCall); + lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "x" })); + Assert + .assertTrue("checkRemoveFromClusterNodeLabelsOfQueue should be called " + + "since its not recovery", checkQueueCall); + rm2.stop(); + } + + private MockRM initRM(Configuration conf) { MockRM rm = new MockRM(conf) { @Override public RMNodeLabelsManager createNodeLabelManager() { return lmgr; } }; - lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a")); - lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" })); rm.getRMContext().setNodeLabelManager(lmgr); rm.start(); - lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a")); - Assert.assertEquals(false, checkQueueCall); - lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" })); - Assert.assertEquals(true, checkQueueCall); - lmgr.stop(); - lmgr.close(); - rm.stop(); + Assert.assertEquals(Service.STATE.STARTED, rm.getServiceState()); + return rm; + } + + private Configuration getConfigurationWithQueueLabels(Configuration config) { + CapacitySchedulerConfiguration conf = + new CapacitySchedulerConfiguration(config); + // Define top-level queues + conf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a" }); + conf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100); + + final String A = CapacitySchedulerConfiguration.ROOT + ".a"; + conf.setCapacity(A, 100); + conf.setAccessibleNodeLabels(A, ImmutableSet.of("x")); + conf.setCapacityByLabel(A, "x", 100); + return conf; } @Test(timeout = 5000) --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org