This is an automated email from the ASF dual-hosted git repository.
ebadger pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 2255bcf YARN-4771. Some containers can be skipped during log
aggregation after NM restart. Contributed by Jason Lowe and Jim Brennan.
2255bcf is described below
commit 2255bcff87323c3f2513f59e9b016264724b20ea
Author: Eric Badger <[email protected]>
AuthorDate: Fri Jul 24 23:02:19 2020 +0000
YARN-4771. Some containers can be skipped during log aggregation after NM
restart. Contributed by Jason Lowe and Jim Brennan.
(cherry picked from commit ac5f21dbef0f0ad4210e4027f53877760fa606a5)
---
.../server/nodemanager/NodeStatusUpdaterImpl.java | 11 ++++++----
.../server/nodemanager/TestNodeStatusUpdater.java | 24 ++++++++++------------
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 8eb89dc..4c4bd47 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -760,8 +760,13 @@ public class NodeStatusUpdaterImpl extends AbstractService
implements
while (i.hasNext()) {
Entry<ContainerId, Long> mapEntry = i.next();
ContainerId cid = mapEntry.getKey();
- if (mapEntry.getValue() < currentTime) {
- if (!context.getContainers().containsKey(cid)) {
+ if (mapEntry.getValue() >= currentTime) {
+ break;
+ }
+ if (!context.getContainers().containsKey(cid)) {
+ ApplicationId appId =
+ cid.getApplicationAttemptId().getApplicationId();
+ if (isApplicationStopped(appId)) {
i.remove();
try {
context.getNMStateStore().removeContainer(cid);
@@ -769,8 +774,6 @@ public class NodeStatusUpdaterImpl extends AbstractService
implements
LOG.error("Unable to remove container " + cid + " in store", e);
}
}
- } else {
- break;
}
}
}
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 8435340..35c5969 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -903,9 +903,8 @@ public class TestNodeStatusUpdater extends
NodeManagerTestBase {
public void testRecentlyFinishedContainers() throws Exception {
NodeManager nm = new NodeManager();
YarnConfiguration conf = new YarnConfiguration();
- conf.set(
-
NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS,
- "10000");
+ conf.setInt(NodeStatusUpdaterImpl.
+ YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, 1);
nm.init(conf);
NodeStatusUpdaterImpl nodeStatusUpdater =
(NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
@@ -920,18 +919,17 @@ public class TestNodeStatusUpdater extends
NodeManagerTestBase {
nodeStatusUpdater.addCompletedContainer(cId);
Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+ // verify container remains even after expiration if app
+ // is still active
nm.getNMContext().getContainers().remove(cId);
- long time1 = System.currentTimeMillis();
- int waitInterval = 15;
- while (waitInterval-- > 0
- && nodeStatusUpdater.isContainerRecentlyStopped(cId)) {
- nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
- Thread.sleep(1000);
- }
- long time2 = System.currentTimeMillis();
- // By this time the container will be removed from cache. need to verify.
+ Thread.sleep(10);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
+ Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+
+ // complete the application and verify container is removed
+ nm.getNMContext().getApplications().remove(appId);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
Assert.assertFalse(nodeStatusUpdater.isContainerRecentlyStopped(cId));
- Assert.assertTrue((time2 - time1) >= 10000 && (time2 - time1) <= 250000);
}
@Test(timeout = 90000)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]