Repository: hadoop Updated Branches: refs/heads/branch-2 ccc834395 -> 2e99210e4
YARN-2392. Add more diags about app retry limits on AM failures. Contributed by Steve Loughran (cherry picked from commit 1970ca7cbcdb7efa160d0cedc2e3e22c1401fad6) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2e99210e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2e99210e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2e99210e Branch: refs/heads/branch-2 Commit: 2e99210e4e5163820af99f8376ea30f0ac3b2b92 Parents: ccc8343 Author: Jian He <[email protected]> Authored: Thu Jun 4 11:14:09 2015 -0700 Committer: Jian He <[email protected]> Committed: Thu Jun 4 11:14:36 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../server/resourcemanager/rmapp/RMAppImpl.java | 16 +++++++++++++--- .../rmapp/attempt/RMAppAttemptImpl.java | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e99210e/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 71b68fe..f2198e3 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -241,6 +241,9 @@ Release 2.8.0 - UNRELEASED YARN-3467. Expose allocatedMB, allocatedVCores, and runningContainers metrics on running Applications in RM Web UI. (Anubhav Dhoot via kasha) + YARN-2392. Add more diags about app retry limits on AM failures. (Steve + Loughran via jianhe) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e99210e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index f3dacd6..90e63c1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -1014,9 +1014,19 @@ public class RMAppImpl implements RMApp, Recoverable { + " failed due to " + failedEvent.getDiagnostics() + ". Failing the application."; } else if (this.isNumAttemptsBeyondThreshold) { - msg = "Application " + this.getApplicationId() + " failed " - + this.maxAppAttempts + " times due to " - + failedEvent.getDiagnostics() + ". Failing the application."; + int globalLimit = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + msg = String.format( + "Application %s failed %d times%s%s due to %s. Failing the application.", + getApplicationId(), + maxAppAttempts, + (attemptFailuresValidityInterval <= 0 ? "" + : (" in previous " + attemptFailuresValidityInterval + + " milliseconds")), + (globalLimit == maxAppAttempts) ? "" + : (" (global limit =" + globalLimit + + "; local limit is =" + maxAppAttempts + ")"), + failedEvent.getDiagnostics()); } return msg; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e99210e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 684dde8..5171bba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -1459,9 +1459,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { .append(status.getDiagnostics()); if (this.getTrackingUrl() != null) { diagnosticsBuilder.append("For more detailed output,").append( - " check application tracking page: ").append( + " check the application tracking page: ").append( this.getTrackingUrl()).append( - " Then, click on links to logs of each attempt.\n"); + " Then click on links to logs of each attempt.\n"); } return diagnosticsBuilder.toString(); }
