Author: jianhe
Date: Wed Jul 30 03:58:59 2014
New Revision: 1614538
URL: http://svn.apache.org/r1614538
Log:
YARN-2354. DistributedShell may allocate more containers than client specified
after AM restarts. Contributed by Li Lu
Modified:
hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java
Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1614538&r1=1614537&r2=1614538&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Wed Jul 30 03:58:59 2014
@@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
YARN-1796. container-executor shouldn't require o-r permissions (atm)
+ YARN-2354. DistributedShell may allocate more containers than client
+ specified after AM restarts. (Li Lu via jianhe)
+
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES
Modified:
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java?rev=1614538&r1=1614537&r2=1614538&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
(original)
+++
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
Wed Jul 30 03:58:59 2014
@@ -208,7 +208,8 @@ public class ApplicationMaster {
// App Master configuration
// No. of containers to run shell command on
- private int numTotalContainers = 1;
+ @VisibleForTesting
+ protected int numTotalContainers = 1;
// Memory to request for the container on which the shell command will run
private int containerMemory = 10;
// VirtualCores to request for the container on which the shell command will
run
@@ -594,8 +595,8 @@ public class ApplicationMaster {
List<Container> previousAMRunningContainers =
response.getContainersFromPreviousAttempts();
- LOG.info("Received " + previousAMRunningContainers.size()
- + " previous AM's running containers on AM registration.");
+ LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
+ + " previous attempts' running containers on AM registration.");
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
int numTotalContainersToRequest =
@@ -610,7 +611,7 @@ public class ApplicationMaster {
ContainerRequest containerAsk = setupContainerAskForRM();
amRMClient.addContainerRequest(containerAsk);
}
- numRequestedContainers.set(numTotalContainersToRequest);
+ numRequestedContainers.set(numTotalContainers);
try {
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
DSEvent.DS_APP_ATTEMPT_END);
@@ -689,7 +690,7 @@ public class ApplicationMaster {
LOG.info("Got response from RM for container ask, completedCnt="
+ completedContainers.size());
for (ContainerStatus containerStatus : completedContainers) {
- LOG.info("Got container status for containerID="
+ LOG.info(appAttemptID + " got container status for containerID="
+ containerStatus.getContainerId() + ", state="
+ containerStatus.getState() + ", exitStatus="
+ containerStatus.getExitStatus() + ", diagnostics="
Modified:
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java?rev=1614538&r1=1614537&r2=1614538&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java
(original)
+++
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java
Wed Jul 30 03:58:59 2014
@@ -36,9 +36,11 @@ public class TestDSFailedAppMaster exten
if (appAttemptID.getAttemptId() == 2) {
// should reuse the earlier running container, so numAllocatedContainers
// should be set to 1. And should ask no more containers, so
- // numRequestedContainers should be set to 0.
+ // numRequestedContainers should be the same as numTotalContainers.
+ // The only container is the container requested by the AM in the first
+ // attempt.
if (numAllocatedContainers.get() != 1
- || numRequestedContainers.get() != 0) {
+ || numRequestedContainers.get() != numTotalContainers) {
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
+ " and NumRequestedContainers is " + numAllocatedContainers.get()
+ ".Application Master failed. exiting");