Author: vinodkv
Date: Sat Oct 5 06:11:47 2013
New Revision: 1529391
URL: http://svn.apache.org/r1529391
Log:
YARN-1273. Fixed Distributed-shell to account for containers that failed to
start. Contributed by Hitesh Shah.
svn merge --ignore-ancestry -c 1529389 ../../trunk
Added:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java
- copied unchanged from r1529389,
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt?rev=1529391&r1=1529390&r2=1529391&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
(original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt Sat
Oct 5 06:11:47 2013
@@ -110,6 +110,9 @@ Release 2.1.2 - UNRELEASED
YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar
Vinit Joshi via vinodkv)
+ YARN-1273. Fixed Distributed-shell to account for containers that failed
+ to start. (Hitesh Shah via vinodkv)
+
Release 2.1.1-beta - 2013-09-23
INCOMPATIBLE CHANGES
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java?rev=1529391&r1=1529390&r2=1529391&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
Sat Oct 5 06:11:47 2013
@@ -34,6 +34,7 @@ import java.util.concurrent.ConcurrentHa
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
@@ -281,8 +282,8 @@ public class ApplicationMaster {
}
}
- public ApplicationMaster() throws Exception {
- // Set up the configuration and RPC
+ public ApplicationMaster() {
+ // Set up the configuration
conf = new YarnConfiguration();
}
@@ -470,7 +471,7 @@ public class ApplicationMaster {
amRMClient.init(conf);
amRMClient.start();
- containerListener = new NMCallbackHandler();
+ containerListener = createNMCallbackHandler();
nmClientAsync = new NMClientAsyncImpl(containerListener);
nmClientAsync.init(conf);
nmClientAsync.start();
@@ -500,7 +501,6 @@ public class ApplicationMaster {
containerMemory = maxMem;
}
-
// Setup ask for containers from RM
// Send request for containers to RM
// Until we get our fully allocated quota, we keep on polling RM for
@@ -513,7 +513,8 @@ public class ApplicationMaster {
}
numRequestedContainers.set(numTotalContainers);
- while (!done) {
+ while (!done
+ && (numCompletedContainers.get() != numTotalContainers)) {
try {
Thread.sleep(200);
} catch (InterruptedException ex) {}
@@ -522,7 +523,12 @@ public class ApplicationMaster {
return success;
}
-
+
+ @VisibleForTesting
+ NMCallbackHandler createNMCallbackHandler() {
+ return new NMCallbackHandler(this);
+ }
+
private void finish() {
// Join all launched threads
// needed for when we time out
@@ -566,7 +572,6 @@ public class ApplicationMaster {
LOG.error("Failed to unregister application", e);
}
- done = true;
amRMClient.stop();
}
@@ -679,10 +684,17 @@ public class ApplicationMaster {
}
}
- private class NMCallbackHandler implements NMClientAsync.CallbackHandler {
+ @VisibleForTesting
+ static class NMCallbackHandler
+ implements NMClientAsync.CallbackHandler {
private ConcurrentMap<ContainerId, Container> containers =
new ConcurrentHashMap<ContainerId, Container>();
+ private final ApplicationMaster applicationMaster;
+
+ public NMCallbackHandler(ApplicationMaster applicationMaster) {
+ this.applicationMaster = applicationMaster;
+ }
public void addContainer(ContainerId containerId, Container container) {
containers.putIfAbsent(containerId, container);
@@ -713,7 +725,7 @@ public class ApplicationMaster {
}
Container container = containers.get(containerId);
if (container != null) {
- nmClientAsync.getContainerStatusAsync(containerId,
container.getNodeId());
+ applicationMaster.nmClientAsync.getContainerStatusAsync(containerId,
container.getNodeId());
}
}
@@ -721,6 +733,8 @@ public class ApplicationMaster {
public void onStartContainerError(ContainerId containerId, Throwable t) {
LOG.error("Failed to start Container " + containerId);
containers.remove(containerId);
+ applicationMaster.numCompletedContainers.incrementAndGet();
+ applicationMaster.numFailedContainers.incrementAndGet();
}
@Override
@@ -847,7 +861,6 @@ public class ApplicationMaster {
/**
* Setup the request that will be sent to the RM for the container ask.
*
- * @param numContainers Containers to ask for from RM
* @return the setup ResourceRequest to be sent to RM
*/
private ContainerRequest setupContainerAskForRM() {
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java?rev=1529391&r1=1529390&r2=1529391&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
Sat Oct 5 06:11:47 2013
@@ -125,8 +125,7 @@ public class Client {
// Application master jar file
private String appMasterJar = "";
// Main class to invoke application master
- private final String appMasterMainClass =
- "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster";
+ private final String appMasterMainClass;
// Shell command to be executed
private String shellCommand = "";
@@ -193,8 +192,14 @@ public class Client {
/**
*/
public Client(Configuration conf) throws Exception {
-
+ this(
+ "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster",
+ conf);
+ }
+
+ Client(String appMasterMainClass, Configuration conf) {
this.conf = conf;
+ this.appMasterMainClass = appMasterMainClass;
yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
opts = new Options();
@@ -214,6 +219,7 @@ public class Client {
opts.addOption("log_properties", true, "log4j.properties file");
opts.addOption("debug", false, "Dump out debug information");
opts.addOption("help", false, "Print usage");
+
}
/**
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java?rev=1529391&r1=1529390&r2=1529391&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
Sat Oct 5 06:11:47 2013
@@ -59,7 +59,7 @@ public class TestDistributedShell {
protected static String APPMASTER_JAR =
JarFinder.getJar(ApplicationMaster.class);
@BeforeClass
- public static void setup() throws InterruptedException, Exception {
+ public static void setup() throws Exception {
LOG.info("Starting up YARN cluster");
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
conf.setClass(YarnConfiguration.RM_SCHEDULER,
@@ -135,7 +135,7 @@ public class TestDistributedShell {
} catch (Exception e) {
throw new RuntimeException(e);
}
- };
+ }
};
t.start();
@@ -248,5 +248,34 @@ public class TestDistributedShell {
Thread.sleep(2000);
}
}
+
+ @Test(timeout=90000)
+ public void testContainerLaunchFailureHandling() throws Exception {
+ String[] args = {
+ "--jar",
+ APPMASTER_JAR,
+ "--num_containers",
+ "2",
+ "--shell_command",
+ Shell.WINDOWS ? "dir" : "ls",
+ "--master_memory",
+ "512",
+ "--container_memory",
+ "128"
+ };
+
+ LOG.info("Initializing DS Client");
+ Client client = new Client(ContainerLaunchFailAppMaster.class.getName(),
+ new Configuration(yarnCluster.getConfig()));
+ boolean initSuccess = client.init(args);
+ Assert.assertTrue(initSuccess);
+ LOG.info("Running DS Client");
+ boolean result = client.run();
+
+ LOG.info("Client run completed. Result=" + result);
+ Assert.assertFalse(result);
+
+ }
+
}