Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8 2a9d8da6d -> d84958405


YARN-8774. Memory leak when CapacityScheduler allocates from reserved container 
with non-default label. Contributed by Tao Yang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d8495840
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d8495840
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d8495840

Branch: refs/heads/branch-2.8
Commit: d8495840506e51153633a57aae88c89ed8ec7508
Parents: 2a9d8da
Author: Eric E Payne <[email protected]>
Authored: Fri Sep 28 16:24:07 2018 +0000
Committer: Eric E Payne <[email protected]>
Committed: Fri Sep 28 16:24:07 2018 +0000

----------------------------------------------------------------------
 .../scheduler/SchedulerApplicationAttempt.java  |  7 ++-
 .../TestNodeLabelContainerAllocation.java       | 63 ++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8495840/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index 28067be..7a957eb 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -398,9 +398,14 @@ public class SchedulerApplicationAttempt implements 
SchedulableEntity {
       Priority priority, RMContainer rmContainer, Container container) {
     // Create RMContainer if necessary
     if (rmContainer == null) {
+      ResourceRequest anyRequest =
+          getResourceRequest(priority, ResourceRequest.ANY);
       rmContainer =
           new RMContainerImpl(container, getApplicationAttemptId(),
-              node.getNodeID(), appSchedulingInfo.getUser(), rmContext);
+              node.getNodeID(), appSchedulingInfo.getUser(), rmContext,
+              anyRequest != null ?
+                  anyRequest.getNodeLabelExpression() :
+                  CommonNodeLabelsManager.NO_LABEL);
       attemptResourceUsage.incReserved(node.getPartition(),
           container.getResource());
       ((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8495840/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
index 671a003..808cac4 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
@@ -43,6 +43,7 @@ import 
org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabels
 import 
org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
 import 
org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import 
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import 
org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
@@ -543,6 +544,68 @@ public class TestNodeLabelContainerAllocation {
     rm1.close();
   }
 
+  @Test (timeout = 120000)
+  public void testRMContainerLeakInLeafQueue() throws Exception {
+    // set node -> label
+    mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x"));
+    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), 
toSet("x"),
+        NodeId.newInstance("h2", 0), toSet("x")));
+
+    // inject node label manager
+    MockRM rm1 =
+        new MockRM(TestUtils.getConfigurationWithDefaultQueueLabels(conf)) {
+          @Override public RMNodeLabelsManager createNodeLabelManager() {
+            return mgr;
+          }
+        };
+
+    rm1.getRMContext().setNodeLabelManager(mgr);
+    rm1.start();
+    MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); // label = x
+    rm1.registerNode("h2:1234", 8 * GB); // label = x
+
+    // launch an app to queue a1 (label = x), and check all container will
+    // be allocated in h1
+    RMApp app1 = rm1.submitApp(1 * GB, "app1", "user", null, "a1");
+    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+    RMApp app2 = rm1.submitApp(1 * GB, "app2", "user", null, "a1");
+    MockRM.launchAndRegisterAM(app2, rm1, nm1);
+
+    // request a container.
+    am1.allocate("*", 7 * GB, 2, new ArrayList<ContainerId>());
+
+    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
+    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
+    LeafQueue leafQueue = (LeafQueue) cs.getQueue("a1");
+
+    // Do node heartbeats 1 time
+    // scheduler will reserve a container for app1
+    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+
+    // Check if a 4G container allocated for app1, and 4G is reserved
+    FiCaSchedulerApp schedulerApp1 =
+        cs.getApplicationAttempt(am1.getApplicationAttemptId());
+    Assert.assertEquals(1, schedulerApp1.getLiveContainers().size());
+    Assert.assertEquals(1, schedulerApp1.getReservedContainers().size());
+
+    // kill app2 then do node heartbeat 1 time
+    // scheduler will allocate a container from the reserved container on nm1
+    rm1.killApp(app2.getApplicationId());
+    rm1.waitForState(app2.getApplicationId(), RMAppState.KILLED);
+    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+    Assert.assertEquals(2, schedulerApp1.getLiveContainers().size());
+    Assert.assertEquals(0, schedulerApp1.getReservedContainers().size());
+
+    // After kill app1, LeafQueue#ignorePartitionExclusivityRMContainers should
+    // be clean, otherwise resource leak happened
+    rm1.killApp(app1.getApplicationId());
+    rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
+    Assert.assertEquals(0, 
leafQueue.getIgnoreExclusivityRMContainers().size());
+
+    rm1.close();
+  }
+
   private void checkPendingResource(MockRM rm, int priority,
       ApplicationAttemptId attemptId, int memory) {
     CapacityScheduler cs = (CapacityScheduler) 
rm.getRMContext().getScheduler();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to