YARN-6920. Fix resource leak that happens during container re-initialization. 
(asuresh)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8d3fd819
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8d3fd819
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8d3fd819

Branch: refs/heads/HADOOP-13345
Commit: 8d3fd81980275fa81e7a5539b1751f38a63b6911
Parents: c61f2c4
Author: Arun Suresh <asur...@apache.org>
Authored: Mon Aug 7 18:59:25 2017 -0700
Committer: Arun Suresh <asur...@apache.org>
Committed: Mon Aug 7 18:59:25 2017 -0700

----------------------------------------------------------------------
 .../yarn/client/api/impl/TestNMClient.java      | 37 +++++++++-----------
 .../container/ContainerImpl.java                |  4 +++
 .../scheduler/ContainerScheduler.java           |  4 +++
 .../containermanager/TestContainerManager.java  |  9 +++++
 4 files changed, 34 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d3fd819/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
index 1034f7e..6bd0816 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
@@ -398,6 +398,8 @@ public class TestNMClient {
               "will be Rolled-back", Arrays.asList(new Integer[] {-1000}));
           testCommitContainer(container.getId(), true);
           testReInitializeContainer(container.getId(), clc, false);
+          testGetContainerStatus(container, i, ContainerState.RUNNING,
+              "will be Re-initialized", Arrays.asList(new Integer[] {-1000}));
           testCommitContainer(container.getId(), false);
         } else {
           testReInitializeContainer(container.getId(), clc, true);
@@ -449,24 +451,21 @@ public class TestNMClient {
       ContainerState state, String diagnostics, List<Integer> exitStatuses)
           throws YarnException, IOException {
     while (true) {
-      try {
-        ContainerStatus status = nmClient.getContainerStatus(
-            container.getId(), container.getNodeId());
-        // NodeManager may still need some time to get the stable
-        // container status
-        if (status.getState() == state) {
-          assertEquals(container.getId(), status.getContainerId());
-          assertTrue("" + index + ": " + status.getDiagnostics(),
-              status.getDiagnostics().contains(diagnostics));
-          
-          assertTrue("Exit Statuses are supposed to be in: " + exitStatuses +
-              ", but the actual exit status code is: " + 
status.getExitStatus(),
-              exitStatuses.contains(status.getExitStatus()));
-          break;
-        }
-        Thread.sleep(100);
-      } catch (InterruptedException e) {
-        e.printStackTrace();
+      sleep(250);
+      ContainerStatus status = nmClient.getContainerStatus(
+          container.getId(), container.getNodeId());
+      // NodeManager may still need some time to get the stable
+      // container status
+      if (status.getState() == state) {
+        assertEquals(container.getId(), status.getContainerId());
+        assertTrue("" + index + ": " + status.getDiagnostics(),
+            status.getDiagnostics().contains(diagnostics));
+
+        assertTrue("Exit Statuses are supposed to be in: " + exitStatuses +
+                ", but the actual exit status code is: " +
+                status.getExitStatus(),
+            exitStatuses.contains(status.getExitStatus()));
+        break;
       }
     }
   }
@@ -559,9 +558,7 @@ public class TestNMClient {
       ContainerLaunchContext clc, boolean autoCommit)
       throws YarnException, IOException {
     try {
-      sleep(250);
       nmClient.reInitializeContainer(containerId, clc, autoCommit);
-      sleep(250);
     } catch (YarnException e) {
       // NM container will only be in SCHEDULED state, so expect the increase
       // action to fail.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d3fd819/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 46f8fa0..c0aa6b0 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -1397,6 +1397,10 @@ public class ContainerImpl implements Container {
       container.resourceSet =
           container.reInitContext.mergedResourceSet(container.resourceSet);
       container.isMarkeForKilling = false;
+      // Ensure Resources are decremented.
+      container.dispatcher.getEventHandler().handle(
+          new ContainerSchedulerEvent(container,
+          ContainerSchedulerEventType.CONTAINER_COMPLETED));
       container.sendScheduleEvent();
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d3fd819/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
index c119bf2..60d6213 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
@@ -466,4 +466,8 @@ public class ContainerScheduler extends AbstractService 
implements
     return this.context.getContainerManager().getContainersMonitor();
   }
 
+  @VisibleForTesting
+  public ResourceUtilization getCurrentUtilization() {
+    return this.utilizationTracker.getCurrentUtilization();
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8d3fd819/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index f2d2037..24d46b6 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceUtilization;
 import org.apache.hadoop.yarn.api.records.SerializedException;
 import org.apache.hadoop.yarn.api.records.SignalContainerCommand;
 import org.apache.hadoop.yarn.api.records.Token;
@@ -437,7 +438,15 @@ public class TestContainerManager extends 
BaseContainerManagerTest {
 
     File newStartFile = new File(tmpDir, "start_file_n.txt").getAbsoluteFile();
 
+    ResourceUtilization beforeUpgrade =
+        ResourceUtilization.newInstance(
+            containerManager.getContainerScheduler().getCurrentUtilization());
     prepareContainerUpgrade(autoCommit, false, false, cId, newStartFile);
+    ResourceUtilization afterUpgrade =
+        ResourceUtilization.newInstance(
+            containerManager.getContainerScheduler().getCurrentUtilization());
+    Assert.assertEquals("Possible resource leak detected !!",
+        beforeUpgrade, afterUpgrade);
 
     // Assert that the First process is not alive anymore
     Assert.assertFalse("Process is still alive!",


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to