This is an automated email from the ASF dual-hosted git repository.
ebadger pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 7ec692a YARN-4771. Some containers can be skipped during log
aggregation after NM restart. Contributed by Jason Lowe and Jim Brennan.
7ec692a is described below
commit 7ec692aa832d74db3aba753c14cd798d956ff51f
Author: Eric Badger <[email protected]>
AuthorDate: Fri Jul 24 22:46:57 2020 +0000
YARN-4771. Some containers can be skipped during log aggregation after NM
restart. Contributed by Jason Lowe and Jim Brennan.
(cherry picked from commit ac5f21dbef0f0ad4210e4027f53877760fa606a5)
---
.../server/nodemanager/NodeStatusUpdaterImpl.java | 11 ++++++----
.../server/nodemanager/TestNodeStatusUpdater.java | 24 ++++++++++------------
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 5e3693a..fb2d918 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -776,8 +776,13 @@ public class NodeStatusUpdaterImpl extends AbstractService
implements
while (i.hasNext()) {
Entry<ContainerId, Long> mapEntry = i.next();
ContainerId cid = mapEntry.getKey();
- if (mapEntry.getValue() < currentTime) {
- if (!context.getContainers().containsKey(cid)) {
+ if (mapEntry.getValue() >= currentTime) {
+ break;
+ }
+ if (!context.getContainers().containsKey(cid)) {
+ ApplicationId appId =
+ cid.getApplicationAttemptId().getApplicationId();
+ if (isApplicationStopped(appId)) {
i.remove();
try {
context.getNMStateStore().removeContainer(cid);
@@ -785,8 +790,6 @@ public class NodeStatusUpdaterImpl extends AbstractService
implements
LOG.error("Unable to remove container " + cid + " in store", e);
}
}
- } else {
- break;
}
}
}
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index c0831ee..2477af2 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -931,9 +931,8 @@ public class TestNodeStatusUpdater extends
NodeManagerTestBase {
public void testRecentlyFinishedContainers() throws Exception {
NodeManager nm = new NodeManager();
YarnConfiguration conf = new YarnConfiguration();
- conf.set(
-
NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS,
- "10000");
+ conf.setInt(NodeStatusUpdaterImpl.
+ YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, 1);
nm.init(conf);
NodeStatusUpdaterImpl nodeStatusUpdater =
(NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
@@ -948,18 +947,17 @@ public class TestNodeStatusUpdater extends
NodeManagerTestBase {
nodeStatusUpdater.addCompletedContainer(cId);
Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+ // verify container remains even after expiration if app
+ // is still active
nm.getNMContext().getContainers().remove(cId);
- long time1 = System.currentTimeMillis();
- int waitInterval = 15;
- while (waitInterval-- > 0
- && nodeStatusUpdater.isContainerRecentlyStopped(cId)) {
- nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
- Thread.sleep(1000);
- }
- long time2 = System.currentTimeMillis();
- // By this time the container will be removed from cache. need to verify.
+ Thread.sleep(10);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
+ Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+
+ // complete the application and verify container is removed
+ nm.getNMContext().getApplications().remove(appId);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
Assert.assertFalse(nodeStatusUpdater.isContainerRecentlyStopped(cId));
- Assert.assertTrue((time2 - time1) >= 10000 && (time2 - time1) <= 250000);
}
@Test(timeout = 90000)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]