Repository: hadoop Updated Branches: refs/heads/branch-2 850bd0ed7 -> c27a5ba65
YARN-7591. NPE in async-scheduling mode of CapacityScheduler. (Tao Yang via wangda) Change-Id: I67e086d6d35c3166ec0f6305490ea6527bd6d799 (cherry picked from commit 41796ea60ab9dbd6a41dd8fd3c6ff50990bba2f5) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c27a5ba6 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c27a5ba6 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c27a5ba6 Branch: refs/heads/branch-2 Commit: c27a5ba65c115f72a83c3bef9599579e38926aac Parents: 850bd0e Author: Wangda Tan <[email protected]> Authored: Fri Dec 8 21:43:28 2017 -0800 Committer: Wangda Tan <[email protected]> Committed: Fri Dec 8 21:50:41 2017 -0800 ---------------------------------------------------------------------- .../scheduler/capacity/CapacityScheduler.java | 4 ++++ .../scheduler/capacity/LeafQueue.java | 21 +++++++++++++++++++- .../scheduler/common/fica/FiCaSchedulerApp.java | 5 +++++ 3 files changed, 29 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c27a5ba6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 408032a..e2e2c54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1258,6 +1258,10 @@ public class CapacityScheduler extends if (reservedContainer != null) { FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer( reservedContainer.getContainerId()); + if (reservedApplication == null) { + LOG.error("Trying to schedule for a finished app, please double check."); + return null; + } // Try to fulfill the reservation LOG.info( http://git-wip-us.apache.org/repos/asf/hadoop/blob/c27a5ba6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index e881495..1bbe903 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1186,7 +1186,14 @@ public class LeafQueue extends AbstractCSQueue { allocation.getSchedulingMode(), null); // Deduct resources that we can release - Resource usedResource = Resources.clone(getUser(username).getUsed(p)); + User user = getUser(username); + if (user == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + username + " has been removed!"); + } + return false; + } + Resource usedResource = Resources.clone(user.getUsed(p)); Resources.subtractFrom(usedResource, request.getTotalReleasedResource()); @@ -1391,6 +1398,12 @@ public class LeafQueue extends AbstractCSQueue { SchedulingMode schedulingMode, Resource userLimit) { String user = application.getUser(); User queueUser = getUser(user); + if (queueUser == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + user + " has been removed!"); + } + return Resources.none(); + } // Compute user limit respect requested labels, // TODO, need consider headroom respect labels also @@ -1485,6 +1498,12 @@ public class LeafQueue extends AbstractCSQueue { try { readLock.lock(); User user = getUser(userName); + if (user == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("User " + userName + " has been removed!"); + } + return false; + } currentResourceLimits.setAmountNeededUnreserve(Resources.none()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/c27a5ba6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 192bfa0..cb6ff31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -321,6 +321,11 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { RMContainer reservedContainerOnNode = schedulerContainer.getSchedulerNode().getReservedContainer(); if (reservedContainerOnNode != null) { + // adding NP check as this proposal could not be allocated from reserved + // container in async-scheduling mode + if (allocation.getAllocateFromReservedContainer() == null) { + return false; + } RMContainer fromReservedContainer = allocation.getAllocateFromReservedContainer().getRmContainer(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
