Repository: hadoop Updated Branches: refs/heads/branch-2.7 ae854076c -> 8e675d93d
YARN-6959. RM may allocate wrong AM Container for new attempt. Contributed by Yuqi Wang Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8e675d93 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8e675d93 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8e675d93 Branch: refs/heads/branch-2.7 Commit: 8e675d93df4f5e28757ff9d5a84c52e919d2f7b9 Parents: ae85407 Author: Jian He <jia...@apache.org> Authored: Thu Aug 17 11:21:58 2017 -0700 Committer: Jian He <jia...@apache.org> Committed: Thu Aug 17 11:22:01 2017 -0700 ---------------------------------------------------------------------- .../scheduler/AbstractYarnScheduler.java | 1 + .../scheduler/capacity/CapacityScheduler.java | 15 ++++++- .../scheduler/fair/FairScheduler.java | 15 ++++++- .../scheduler/fifo/FifoScheduler.java | 15 ++++++- .../scheduler/fair/TestFairScheduler.java | 46 ++++++++++---------- 5 files changed, 63 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e675d93/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index e61587d..1e9a591 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -227,6 +227,7 @@ public abstract class AbstractYarnScheduler application.containerLaunchedOnNode(containerId, node.getNodeID()); } + // TODO: Rename it to getCurrentApplicationAttempt public T getApplicationAttempt(ApplicationAttemptId applicationAttemptId) { SchedulerApplication<T> app = applications.get(applicationAttemptId.getApplicationId()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e675d93/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index c8ea5a5..0a281ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -938,8 +938,19 @@ public class CapacityScheduler extends FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { - LOG.info("Calling allocate on removed " + - "or non existant application " + applicationAttemptId); + LOG.error("Calling allocate on removed or non existent application " + + applicationAttemptId.getApplicationId()); + return EMPTY_ALLOCATION; + } + + // The allocate may be the leftover from previous attempt, and it will + // impact current attempt, such as confuse the request and allocation for + // current attempt's AM container. + // Note outside precondition check for the attempt id may be + // outdated here, so double check it here is necessary. + if (!application.getApplicationAttemptId().equals(applicationAttemptId)) { + LOG.error("Calling allocate on previous or removed " + + "or non existent application attempt " + applicationAttemptId); return EMPTY_ALLOCATION; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e675d93/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 4591f63..839bef5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -889,8 +889,19 @@ public class FairScheduler extends // Make sure this application exists FSAppAttempt application = getSchedulerApp(appAttemptId); if (application == null) { - LOG.info("Calling allocate on removed " + - "or non existant application " + appAttemptId); + LOG.error("Calling allocate on removed or non existent application " + + appAttemptId.getApplicationId()); + return EMPTY_ALLOCATION; + } + + // The allocate may be the leftover from previous attempt, and it will + // impact current attempt, such as confuse the request and allocation for + // current attempt's AM container. + // Note outside precondition check for the attempt id may be + // outdated here, so double check it here is necessary. + if (!application.getApplicationAttemptId().equals(appAttemptId)) { + LOG.error("Calling allocate on previous or removed " + + "or non existent application attempt " + appAttemptId); return EMPTY_ALLOCATION; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e675d93/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 694b061..0666d68 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -300,8 +300,19 @@ public class FifoScheduler extends List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { - LOG.error("Calling allocate on removed " + - "or non existant application " + applicationAttemptId); + LOG.error("Calling allocate on removed or non existent application " + + applicationAttemptId.getApplicationId()); + return EMPTY_ALLOCATION; + } + + // The allocate may be the leftover from previous attempt, and it will + // impact current attempt, such as confuse the request and allocation for + // current attempt's AM container. + // Note outside precondition check for the attempt id may be + // outdated here, so double check it here is necessary. + if (!application.getApplicationAttemptId().equals(applicationAttemptId)) { + LOG.error("Calling allocate on previous or removed " + + "or non existent application attempt " + applicationAttemptId); return EMPTY_ALLOCATION; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e675d93/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 7b3c17f..6f0de07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -1387,45 +1387,45 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); + int minReqSize = + FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB; + // First ask, queue1 requests 1 large (minReqSize * 2). ApplicationAttemptId id11 = createAppAttemptId(1, 1); createMockRMApp(id11); - scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1", false); + scheduler.addApplication(id11.getApplicationId(), + "root.queue1", "user1", false); scheduler.addApplicationAttempt(id11, false, false); - ApplicationAttemptId id21 = createAppAttemptId(2, 1); - createMockRMApp(id21); - scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1", false); - scheduler.addApplicationAttempt(id21, false, false); - ApplicationAttemptId id22 = createAppAttemptId(2, 2); - createMockRMApp(id22); - - scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1", false); - scheduler.addApplicationAttempt(id22, false, false); - - int minReqSize = - FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB; - - // First ask, queue1 requests 1 large (minReqSize * 2). List<ResourceRequest> ask1 = new ArrayList<ResourceRequest>(); - ResourceRequest request1 = - createResourceRequest(minReqSize * 2, ResourceRequest.ANY, 1, 1, true); + ResourceRequest request1 = createResourceRequest(minReqSize * 2, + ResourceRequest.ANY, 1, 1, true); ask1.add(request1); scheduler.allocate(id11, ask1, new ArrayList<ContainerId>(), null, null); // Second ask, queue2 requests 1 large + (2 * minReqSize) + ApplicationAttemptId id21 = createAppAttemptId(2, 1); + createMockRMApp(id21); + scheduler.addApplication(id21.getApplicationId(), + "root.queue2", "user1", false); + scheduler.addApplicationAttempt(id21, false, false); List<ResourceRequest> ask2 = new ArrayList<ResourceRequest>(); - ResourceRequest request2 = createResourceRequest(2 * minReqSize, "foo", 1, 1, - false); - ResourceRequest request3 = createResourceRequest(minReqSize, "bar", 1, 2, - false); + ResourceRequest request2 = createResourceRequest(2 * minReqSize, + "foo", 1, 1, false); + ResourceRequest request3 = createResourceRequest(minReqSize, + ResourceRequest.ANY, 1, 2, false); ask2.add(request2); ask2.add(request3); scheduler.allocate(id21, ask2, new ArrayList<ContainerId>(), null, null); // Third ask, queue2 requests 1 large + ApplicationAttemptId id22 = createAppAttemptId(2, 2); + createMockRMApp(id22); + scheduler.addApplication(id22.getApplicationId(), + "root.queue2", "user1", false); + scheduler.addApplicationAttempt(id22, false, false); List<ResourceRequest> ask3 = new ArrayList<ResourceRequest>(); - ResourceRequest request4 = - createResourceRequest(2 * minReqSize, ResourceRequest.ANY, 1, 1, true); + ResourceRequest request4 = createResourceRequest(2 * minReqSize, + ResourceRequest.ANY, 1, 1, true); ask3.add(request4); scheduler.allocate(id22, ask3, new ArrayList<ContainerId>(), null, null); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org