YARN-3830. AbstractYarnScheduler.createReleaseCache may try to clean a null attempt. Contributed by nijel.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/80a68d60 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/80a68d60 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/80a68d60 Branch: refs/heads/HDFS-7240 Commit: 80a68d60560e505b5f8e01969dc3c168a1e5a7f3 Parents: 7405c59 Author: Devaraj K <deva...@apache.org> Authored: Wed Jul 1 19:03:44 2015 +0530 Committer: Devaraj K <deva...@apache.org> Committed: Wed Jul 1 19:03:44 2015 +0530 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/AbstractYarnScheduler.java | 34 +++++---- .../scheduler/TestAbstractYarnScheduler.java | 74 ++++++++++++++++++-- 3 files changed, 91 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/80a68d60/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 8c07e48..4389e27 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -583,6 +583,9 @@ Release 2.8.0 - UNRELEASED YARN-3823. Fix mismatch in default values for yarn.scheduler.maximum-allocation-vcores property. (Ray Chiang via devaraj) + YARN-3830. AbstractYarnScheduler.createReleaseCache may try to clean a null + attempt. (nijel via devaraj) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/80a68d60/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index ae927f1..aad76fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.util.resource.Resources; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.SettableFuture; @@ -451,25 +452,30 @@ public abstract class AbstractYarnScheduler new Timer().schedule(new TimerTask() { @Override public void run() { - for (SchedulerApplication<T> app : applications.values()) { + clearPendingContainerCache(); + LOG.info("Release request cache is cleaned up"); + } + }, nmExpireInterval); + } - T attempt = app.getCurrentAppAttempt(); - synchronized (attempt) { - for (ContainerId containerId : attempt.getPendingRelease()) { - RMAuditLogger.logFailure( - app.getUser(), + @VisibleForTesting + public void clearPendingContainerCache() { + for (SchedulerApplication<T> app : applications.values()) { + T attempt = app.getCurrentAppAttempt(); + if (attempt != null) { + synchronized (attempt) { + for (ContainerId containerId : attempt.getPendingRelease()) { + RMAuditLogger.logFailure(app.getUser(), AuditConstants.RELEASE_CONTAINER, - "Unauthorized access or invalid container", - "Scheduler", - "Trying to release container not owned by app or with invalid id.", - attempt.getApplicationId(), containerId); - } - attempt.getPendingRelease().clear(); + "Unauthorized access or invalid container", "Scheduler", + "Trying to release container not owned by app " + + "or with invalid id.", attempt.getApplicationId(), + containerId); } + attempt.getPendingRelease().clear(); } - LOG.info("Release request cache is cleaned up"); } - }, nmExpireInterval); + } } // clean up a completed container http://git-wip-us.apache.org/repos/asf/hadoop/blob/80a68d60/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java index 48ce822..91dd249 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java @@ -18,26 +18,36 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.ParameterizedSchedulerTestBase; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.util.HashMap; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - @SuppressWarnings("unchecked") public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { @@ -341,6 +351,58 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { } } + /* + * This test case is to test the pending containers are cleared from the + * attempt even if one of the application in the list have current attempt as + * null (no attempt). + */ + @SuppressWarnings({ "rawtypes" }) + @Test(timeout = 10000) + public void testReleasedContainerIfAppAttemptisNull() throws Exception { + YarnConfiguration conf=getConf(); + conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName()); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + MockRM rm1 = new MockRM(conf, memStore); + try { + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); + nm1.registerNode(); + + AbstractYarnScheduler scheduler = + (AbstractYarnScheduler) rm1.getResourceScheduler(); + // Mock App without attempt + RMApp mockAPp = + new MockRMApp(125, System.currentTimeMillis(), RMAppState.NEW); + SchedulerApplication<FiCaSchedulerApp> application = + new SchedulerApplication<FiCaSchedulerApp>(null, mockAPp.getUser()); + + // Second app with one app attempt + RMApp app = rm1.submitApp(200); + MockAM am1 = MockRM.launchAndRegisterAM(app, rm1, nm1); + final ContainerId runningContainer = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + am1.allocate(null, Arrays.asList(runningContainer)); + + Map schedulerApplications = scheduler.getSchedulerApplications(); + SchedulerApplication schedulerApp = + (SchedulerApplication) scheduler.getSchedulerApplications().get( + app.getApplicationId()); + schedulerApplications.put(mockAPp.getApplicationId(), application); + + scheduler.clearPendingContainerCache(); + + Assert.assertEquals("Pending containers are not released " + + "when one of the application attempt is null !", schedulerApp + .getCurrentAppAttempt().getPendingRelease().size(), 0); + } finally { + if (rm1 != null) { + rm1.stop(); + } + } + } + private void verifyMaximumResourceCapability( Resource expectedMaximumResource, AbstractYarnScheduler scheduler) {