YARN-8771. CapacityScheduler fails to unreserve when cluster resource contains empty resource type. Contributed by Tao Yang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0712537e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0712537e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0712537e Branch: refs/heads/YARN-7402 Commit: 0712537e799bc03855d548d1f4bd690dd478b871 Parents: 28ceb34 Author: Weiwei Yang <w...@apache.org> Authored: Wed Sep 19 19:31:07 2018 +0800 Committer: Weiwei Yang <w...@apache.org> Committed: Wed Sep 19 19:31:07 2018 +0800 ---------------------------------------------------------------------- .../allocator/RegularContainerAllocator.java | 3 +- .../capacity/TestContainerAllocation.java | 68 ++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0712537e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 3e337ef..c0a11a0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -543,8 +543,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { currentResoureLimits.getAmountNeededUnreserve()); boolean needToUnreserve = - Resources.greaterThan(rc, clusterResource, - resourceNeedToUnReserve, Resources.none()); + rc.isAnyMajorResourceAboveZero(resourceNeedToUnReserve); RMContainer unreservedContainer = null; boolean reservationsContinueLooking = http://git-wip-us.apache.org/repos/asf/hadoop/blob/0712537e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index b9bfc2a..3d028ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.RMSecretManagerService; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -59,7 +60,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemoved import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.hadoop.yarn.util.resource.TestResourceUtils; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -984,4 +988,68 @@ public class TestContainerAllocation { Assert.assertEquals(2, lq.getMetrics().getAppsPending()); rm1.close(); } + + @Test(timeout = 60000) + public void testUnreserveWhenClusterResourceHasEmptyResourceType() + throws Exception { + /** + * Test case: + * Create a cluster with two nodes whose node resource both are + * <8GB, 8core, 0>, create queue "a" whose max-resource is <8GB, 8 core, 0>, + * submit app1 to queue "a" whose am use <1GB, 1 core, 0> and launch on nm1, + * submit app2 to queue "b" whose am use <1GB, 1 core, 0> and launch on nm1, + * app1 asks two <7GB, 1core> containers and nm1 do 1 heartbeat, + * then scheduler reserves one container on nm1. + * + * After nm2 do next node heartbeat, scheduler should unreserve the reserved + * container on nm1 then allocate a container on nm2. + */ + TestResourceUtils.addNewTypesToResources("resource1"); + CapacitySchedulerConfiguration newConf = + (CapacitySchedulerConfiguration) TestUtils + .getConfigurationWithMultipleQueues(conf); + newConf.setClass(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, + DominantResourceCalculator.class, ResourceCalculator.class); + newConf + .setBoolean(TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES, false); + // Set maximum capacity of queue "a" to 50 + newConf.setMaximumCapacity(CapacitySchedulerConfiguration.ROOT + ".a", 50); + MockRM rm1 = new MockRM(newConf); + + RMNodeLabelsManager nodeLabelsManager = new NullRMNodeLabelsManager(); + nodeLabelsManager.init(newConf); + rm1.getRMContext().setNodeLabelManager(nodeLabelsManager); + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // launch an app to queue "a", AM container should be launched on nm1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // launch another app to queue "b", AM container should be launched on nm1 + RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b"); + MockRM.launchAndRegisterAM(app2, rm1, nm1); + + am1.allocate("*", 7 * GB, 2, new ArrayList<ContainerId>()); + + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId()); + FiCaSchedulerApp schedulerApp1 = + cs.getApplicationAttempt(am1.getApplicationAttemptId()); + + // Do nm1 heartbeats 1 times, will reserve a container on nm1 for app1 + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + Assert.assertEquals(1, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(1, schedulerApp1.getReservedContainers().size()); + + // Do nm2 heartbeats 1 times, will unreserve a container on nm1 + // and allocate a container on nm2 for app1 + cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); + Assert.assertEquals(2, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(0, schedulerApp1.getReservedContainers().size()); + + rm1.close(); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org