YARN-4329. [YARN-5437] Allow fetching exact reason as to why a submitted app
is in ACCEPTED state in Fair Scheduler (Contributed by Yufei Gu)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59ee8b7a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59ee8b7a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59ee8b7a

Branch: refs/heads/YARN-4752
Commit: 59ee8b7a88603e94b5661a8d5d088f7aa99fe049
Parents: 822ae88
Author: Daniel Templeton <templ...@apache.org>
Authored: Wed Nov 9 13:11:37 2016 -0800
Committer: Daniel Templeton <templ...@apache.org>
Committed: Wed Nov 9 13:11:37 2016 -0800

----------------------------------------------------------------------
 .../scheduler/fair/FSAppAttempt.java            | 71 +++++++++++++++-----
 .../scheduler/fair/FairScheduler.java           |  4 +-
 .../scheduler/fair/MaxRunningAppsEnforcer.java  | 50 ++++++++++++--
 .../fair/TestMaxRunningAppsEnforcer.java        |  2 +-
 4 files changed, 103 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/59ee8b7a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
index 11922d9..df20117 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
@@ -766,8 +766,18 @@ public class FSAppAttempt extends 
SchedulerApplicationAttempt
     // The desired container won't fit here, so reserve
     if (isReservable(capability) &&
         reserve(request, node, reservedContainer, type, schedulerKey)) {
+      if (isWaitingForAMContainer()) {
+        updateAMDiagnosticMsg(capability,
+            " exceed the available resources of the node and the request is"
+                + " reserved");
+      }
       return FairScheduler.CONTAINER_RESERVED;
     } else {
+      if (isWaitingForAMContainer()) {
+        updateAMDiagnosticMsg(capability,
+            " exceed the available resources of the node and the request 
cannot"
+                + " be reserved");
+      }
       if (LOG.isDebugEnabled()) {
         LOG.debug("Couldn't creating reservation for " +
             getName() + ",at priority " +  request.getPriority());
@@ -920,23 +930,31 @@ public class FSAppAttempt extends 
SchedulerApplicationAttempt
     ResourceRequest rackRequest = getResourceRequest(key, node.getRackName());
     ResourceRequest nodeRequest = getResourceRequest(key, node.getNodeName());
 
-    return
-        // There must be outstanding requests at the given priority:
+    boolean ret = true;
+    if (!(// There must be outstanding requests at the given priority:
         anyRequest != null && anyRequest.getNumContainers() > 0 &&
-            // If locality relaxation is turned off at *-level, there must be a
-            // non-zero request for the node's rack:
-            (anyRequest.getRelaxLocality() ||
-                (rackRequest != null && rackRequest.getNumContainers() > 0)) &&
-            // If locality relaxation is turned off at rack-level, there must 
be a
-            // non-zero request at the node:
-            (rackRequest == null || rackRequest.getRelaxLocality() ||
-                (nodeRequest != null && nodeRequest.getNumContainers() > 0)) &&
-            // The requested container must be able to fit on the node:
-            Resources.lessThanOrEqual(RESOURCE_CALCULATOR, null,
-                anyRequest.getCapability(),
-                node.getRMNode().getTotalCapability()) &&
-            // The requested container must fit in queue maximum share:
-            getQueue().fitsInMaxShare(anyRequest.getCapability());
+        // If locality relaxation is turned off at *-level, there must be a
+        // non-zero request for the node's rack:
+        (anyRequest.getRelaxLocality() ||
+        (rackRequest != null && rackRequest.getNumContainers() > 0)) &&
+        // If locality relaxation is turned off at rack-level, there must be a
+        // non-zero request at the node:
+        (rackRequest == null || rackRequest.getRelaxLocality() ||
+        (nodeRequest != null && nodeRequest.getNumContainers() > 0)) &&
+        // The requested container must be able to fit on the node:
+        Resources.lessThanOrEqual(RESOURCE_CALCULATOR, null,
+        anyRequest.getCapability(), node.getRMNode().getTotalCapability()))) {
+      ret = false;
+    } else if (!getQueue().fitsInMaxShare(anyRequest.getCapability())) {
+      // The requested container must fit in queue maximum share
+      if (isWaitingForAMContainer()) {
+        updateAMDiagnosticMsg(anyRequest.getCapability(),
+            " exceeds current queue or its parents maximum resource 
allowed).");
+      }
+      ret = false;
+    }
+
+    return ret;
   }
 
   private boolean isValidReservation(FSSchedulerNode node) {
@@ -1083,6 +1101,12 @@ public class FSAppAttempt extends 
SchedulerApplicationAttempt
   @Override
   public Resource assignContainer(FSSchedulerNode node) {
     if (isOverAMShareLimit()) {
+      if (isWaitingForAMContainer()) {
+        List<ResourceRequest> ask = appSchedulingInfo.getAllResourceRequests();
+        updateAMDiagnosticMsg(ask.get(0).getCapability(), " exceeds maximum "
+            + "AM resource allowed).");
+      }
+
       if (LOG.isDebugEnabled()) {
         LOG.debug("Skipping allocation because maxAMShare limit would " +
             "be exceeded");
@@ -1093,6 +1117,21 @@ public class FSAppAttempt extends 
SchedulerApplicationAttempt
   }
 
   /**
+   * Build the diagnostic message and update it.
+   *
+   * @param resource resource request
+   * @param reason the reason why AM doesn't get the resource
+   */
+  private void updateAMDiagnosticMsg(Resource resource, String reason) {
+    StringBuilder diagnosticMessageBldr = new StringBuilder();
+    diagnosticMessageBldr.append(" (Resource request: ");
+    diagnosticMessageBldr.append(resource);
+    diagnosticMessageBldr.append(reason);
+    updateAMContainerDiagnostics(AMState.INACTIVATED,
+        diagnosticMessageBldr.toString());
+  }
+
+  /**
    * Preempt a running container according to the priority
    */
   @Override

http://git-wip-us.apache.org/repos/asf/hadoop/blob/59ee8b7a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
index 94fdb7c..1d04710 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@@ -710,7 +710,7 @@ public class FairScheduler extends
       }
       application.setCurrentAppAttempt(attempt);
 
-      boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user);
+      boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, attempt);
       queue.addApp(attempt, runnable);
       if (runnable) {
         maxRunningEnforcer.trackRunnableApp(attempt);
@@ -1714,7 +1714,7 @@ public class FairScheduler extends
     boolean wasRunnable = oldQueue.removeApp(attempt);
     // if app was not runnable before, it may be runnable now
     boolean nowRunnable = maxRunningEnforcer.canAppBeRunnable(newQueue,
-        attempt.getUser());
+        attempt);
     if (wasRunnable && !nowRunnable) {
       throw new IllegalStateException("Should have already verified that app "
           + attempt.getApplicationId() + " would be runnable in new queue");

http://git-wip-us.apache.org/repos/asf/hadoop/blob/59ee8b7a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
index 8592fa6..02e2d97 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
@@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ListMultimap;
+import 
org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState;
 
 /**
  * Handles tracking and enforcement for user and queue maxRunningApps
@@ -54,25 +55,64 @@ public class MaxRunningAppsEnforcer {
   /**
    * Checks whether making the application runnable would exceed any
    * maxRunningApps limits.
+   *
+   * @param queue the current queue
+   * @param attempt the app attempt being checked
+   * @return true if the application is runnable; false otherwise
+   */
+  public boolean canAppBeRunnable(FSQueue queue, FSAppAttempt attempt) {
+    boolean ret = true;
+    if (exceedUserMaxApps(attempt.getUser())) {
+      attempt.updateAMContainerDiagnostics(AMState.INACTIVATED,
+          "The user \"" + attempt.getUser() + "\" has reached the maximum 
limit"
+              + " of runnable applications.");
+      ret = false;
+    } else if (exceedQueueMaxRunningApps(queue)) {
+      attempt.updateAMContainerDiagnostics(AMState.INACTIVATED,
+          "The queue \"" + queue.getName() + "\" has reached the maximum limit"
+              + " of runnable applications.");
+      ret = false;
+    }
+
+    return ret;
+  }
+
+  /**
+   * Checks whether the number of user runnable apps exceeds the limitation.
+   *
+   * @param user the user name
+   * @return true if the number hits the limit; false otherwise
    */
-  public boolean canAppBeRunnable(FSQueue queue, String user) {
+  public boolean exceedUserMaxApps(String user) {
     AllocationConfiguration allocConf = scheduler.getAllocationConfiguration();
     Integer userNumRunnable = usersNumRunnableApps.get(user);
     if (userNumRunnable == null) {
       userNumRunnable = 0;
     }
     if (userNumRunnable >= allocConf.getUserMaxApps(user)) {
-      return false;
+      return true;
     }
+
+    return false;
+  }
+
+  /**
+   * Recursively checks whether the number of queue runnable apps exceeds the
+   * limitation.
+   *
+   * @param queue the current queue
+   * @return true if the number hits the limit; false otherwise
+   */
+  public boolean exceedQueueMaxRunningApps(FSQueue queue) {
     // Check queue and all parent queues
     while (queue != null) {
       if (queue.getNumRunnableApps() >= queue.getMaxRunningApps()) {
-        return false;
+        return true;
       }
       queue = queue.getParent();
     }
 
-    return true;
+    return false;
   }
 
   /**
@@ -198,7 +238,7 @@ public class MaxRunningAppsEnforcer {
         continue;
       }
 
-      if (canAppBeRunnable(next.getQueue(), next.getUser())) {
+      if (canAppBeRunnable(next.getQueue(), next)) {
         trackRunnableApp(next);
         FSAppAttempt appSched = next;
         next.getQueue().addApp(appSched, true);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/59ee8b7a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
index 3f17081..f77df1e 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
@@ -68,9 +68,9 @@ public class TestMaxRunningAppsEnforcer {
   private FSAppAttempt addApp(FSLeafQueue queue, String user) {
     ApplicationId appId = ApplicationId.newInstance(0l, appNum++);
     ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0);
-    boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, user);
     FSAppAttempt app = new FSAppAttempt(scheduler, attId, user, queue, null,
         rmContext);
+    boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, app);
     queue.addApp(app, runnable);
     if (runnable) {
       maxAppsEnforcer.trackRunnableApp(app);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to