YARN-6302. Fail the node if Linux Container Executor is not configured properly (Contributed by Miklos Szegedi via Daniel Templeton)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/46940d92 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/46940d92 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/46940d92 Branch: refs/heads/YARN-5355 Commit: 46940d92e2b17c627eb17a9d8fc6cec9c3715592 Parents: 394589f Author: Daniel Templeton <templ...@apache.org> Authored: Wed Apr 19 12:18:38 2017 -0700 Committer: Daniel Templeton <templ...@apache.org> Committed: Wed Apr 19 12:23:49 2017 -0700 ---------------------------------------------------------------------- .../yarn/exceptions/ConfigurationException.java | 45 ++++ .../server/nodemanager/ContainerExecutor.java | 4 +- .../nodemanager/DefaultContainerExecutor.java | 7 +- .../nodemanager/LinuxContainerExecutor.java | 79 ++++++- .../nodemanager/NodeHealthCheckerService.java | 54 +++-- .../server/nodemanager/NodeStatusUpdater.java | 6 + .../nodemanager/NodeStatusUpdaterImpl.java | 6 + .../launcher/ContainerLaunch.java | 38 ++-- .../launcher/ContainerRelaunch.java | 9 + .../impl/container-executor.c | 40 ++-- .../impl/container-executor.h | 22 +- .../TestDefaultContainerExecutor.java | 3 +- .../nodemanager/TestLinuxContainerExecutor.java | 9 +- .../TestLinuxContainerExecutorWithMocks.java | 217 +++++++++++-------- .../nodemanager/TestNodeHealthService.java | 11 +- .../launcher/TestContainerLaunch.java | 65 +++++- .../TestContainersMonitorResourceChange.java | 3 +- .../TestContainerSchedulerQueuing.java | 4 +- ...-container-executer-with-configuration-error | 20 ++ 19 files changed, 466 insertions(+), 176 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java new file mode 100644 index 0000000..ed5e42e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ConfigurationException.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +/** + * This exception is thrown on unrecoverable configuration errors. + * An example is container launch error due to configuration. + */ +@Public +@Evolving +public class ConfigurationException extends YarnException { + + private static final long serialVersionUID = 0x9801a7a0f8e3L; + + public ConfigurationException(Throwable cause) { + super(cause); + } + + public ConfigurationException(String message) { + super(message); + } + + public ConfigurationException(String message, Throwable cause) { + super(message, cause); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index 64d2af9..0581878 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -47,6 +47,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; @@ -164,9 +165,10 @@ public abstract class ContainerExecutor implements Configurable { * @param ctx Encapsulates information necessary for launching containers. * @return the return status of the launch * @throws IOException if the container launch fails + * @throws ConfigurationException if config error was found */ public abstract int launchContainer(ContainerStartContext ctx) throws - IOException; + IOException, ConfigurationException; /** * Signal container with the specified signal. http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index 420035d..18604df 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -51,6 +51,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; @@ -209,7 +210,8 @@ public class DefaultContainerExecutor extends ContainerExecutor { } @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { Container container = ctx.getContainer(); Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath(); Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath(); @@ -291,8 +293,7 @@ public class DefaultContainerExecutor extends ContainerExecutor { if (isContainerActive(containerId)) { shExec.execute(); - } - else { + } else { LOG.info("Container " + containerIdStr + " was marked as inactive. Returning terminated error"); return ExitCode.TERMINATED.getExitCode(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 46b6dfb..cb1d53d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -30,6 +30,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; @@ -111,6 +112,58 @@ public class LinuxContainerExecutor extends ContainerExecutor { private LinuxContainerRuntime linuxContainerRuntime; /** + * The container exit code. + */ + public enum ExitCode { + SUCCESS(0), + INVALID_ARGUMENT_NUMBER(1), + INVALID_COMMAND_PROVIDED(3), + INVALID_NM_ROOT_DIRS(5), + SETUID_OPER_FAILED(6), + UNABLE_TO_EXECUTE_CONTAINER_SCRIPT(7), + UNABLE_TO_SIGNAL_CONTAINER(8), + INVALID_CONTAINER_PID(9), + OUT_OF_MEMORY(18), + INITIALIZE_USER_FAILED(20), + PATH_TO_DELETE_IS_NULL(21), + INVALID_CONTAINER_EXEC_PERMISSIONS(22), + INVALID_CONFIG_FILE(24), + SETSID_OPER_FAILED(25), + WRITE_PIDFILE_FAILED(26), + WRITE_CGROUP_FAILED(27), + TRAFFIC_CONTROL_EXECUTION_FAILED(28), + DOCKER_RUN_FAILED(29), + ERROR_OPENING_DOCKER_FILE(30), + ERROR_READING_DOCKER_FILE(31), + FEATURE_DISABLED(32), + COULD_NOT_CREATE_SCRIPT_COPY(33), + COULD_NOT_CREATE_CREDENTIALS_FILE(34), + COULD_NOT_CREATE_WORK_DIRECTORIES(35), + COULD_NOT_CREATE_APP_LOG_DIRECTORIES(36), + COULD_NOT_CREATE_TMP_DIRECTORIES(37), + ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS(38); + + private final int code; + + ExitCode(int exitCode) { + this.code = exitCode; + } + + /** + * Get the exit code as an int. + * @return the exit code as an int + */ + public int getExitCode() { + return code; + } + + @Override + public String toString() { + return String.valueOf(code); + } + } + + /** * Default constructor to allow for creation through reflection. */ public LinuxContainerExecutor() { @@ -386,7 +439,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { } @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { Container container = ctx.getContainer(); Path nmPrivateContainerScriptPath = ctx.getNmPrivateContainerScriptPath(); Path nmPrivateTokensPath = ctx.getNmPrivateTokensPath(); @@ -496,7 +550,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { } else { LOG.info( "Container was marked as inactive. Returning terminated error"); - return ExitCode.TERMINATED.getExitCode(); + return ContainerExecutor.ExitCode.TERMINATED.getExitCode(); } } catch (ContainerExecutionException e) { int exitCode = e.getExitCode(); @@ -504,8 +558,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { // 143 (SIGTERM) and 137 (SIGKILL) exit codes means the container was // terminated/killed forcefully. In all other cases, log the // output - if (exitCode != ExitCode.FORCE_KILLED.getExitCode() - && exitCode != ExitCode.TERMINATED.getExitCode()) { + if (exitCode != ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode() + && exitCode != ContainerExecutor.ExitCode.TERMINATED.getExitCode()) { LOG.warn("Exception from container-launch with container ID: " + containerId + " and exit code: " + exitCode, e); @@ -525,6 +579,23 @@ public class LinuxContainerExecutor extends ContainerExecutor { logOutput(diagnostics); container.handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics)); + if (exitCode == + ExitCode.INVALID_CONTAINER_EXEC_PERMISSIONS.getExitCode() || + exitCode == + ExitCode.INVALID_CONFIG_FILE.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_SCRIPT_COPY.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_CREDENTIALS_FILE.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_WORK_DIRECTORIES.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_APP_LOG_DIRECTORIES.getExitCode() || + exitCode == + ExitCode.COULD_NOT_CREATE_TMP_DIRECTORIES.getExitCode()) { + throw new ConfigurationException( + "Linux Container Executor reached unrecoverable exception", e); + } } else { container.handle(new ContainerDiagnosticsUpdateEvent(containerId, "Container killed on request. Exit code is " + exitCode)); http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java index c1a159a..7e2fc7e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java @@ -18,10 +18,15 @@ package org.apache.hadoop.yarn.server.nodemanager; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.NodeHealthScriptRunner; +import java.util.Arrays; +import java.util.Collections; + /** * The class which provides functionality of checking the health of the node and * reporting back to the service for which the health checker has been asked to @@ -31,6 +36,8 @@ public class NodeHealthCheckerService extends CompositeService { private NodeHealthScriptRunner nodeHealthScriptRunner; private LocalDirsHandlerService dirsHandler; + private Exception nodeHealthException; + private long nodeHealthExceptionReportTime; static final String SEPARATOR = ";"; @@ -39,6 +46,8 @@ public class NodeHealthCheckerService extends CompositeService { super(NodeHealthCheckerService.class.getName()); nodeHealthScriptRunner = scriptRunner; dirsHandler = dirHandlerService; + nodeHealthException = null; + nodeHealthExceptionReportTime = 0; } @Override @@ -54,33 +63,39 @@ public class NodeHealthCheckerService extends CompositeService { * @return the reporting string of health of the node */ String getHealthReport() { - String scriptReport = (nodeHealthScriptRunner == null) ? "" - : nodeHealthScriptRunner.getHealthReport(); - if (scriptReport.equals("")) { - return dirsHandler.getDisksHealthReport(false); - } else { - return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport(false)); - } + String scriptReport = Strings.emptyToNull( + nodeHealthScriptRunner == null ? null : + nodeHealthScriptRunner.getHealthReport()); + String discReport = + Strings.emptyToNull( + dirsHandler.getDisksHealthReport(false)); + String exceptionReport = Strings.emptyToNull( + nodeHealthException == null ? null : + nodeHealthException.getMessage()); + + return Joiner.on(SEPARATOR).skipNulls() + .join(scriptReport, discReport, exceptionReport); } /** * @return <em>true</em> if the node is healthy */ boolean isHealthy() { - boolean scriptHealthStatus = (nodeHealthScriptRunner == null) ? true - : nodeHealthScriptRunner.isHealthy(); - return scriptHealthStatus && dirsHandler.areDisksHealthy(); + boolean scriptHealthy = nodeHealthScriptRunner == null || + nodeHealthScriptRunner.isHealthy(); + return nodeHealthException == null && + scriptHealthy && dirsHandler.areDisksHealthy(); } /** * @return when the last time the node health status is reported */ long getLastHealthReportTime() { - long diskCheckTime = dirsHandler.getLastDisksCheckTime(); - long lastReportTime = (nodeHealthScriptRunner == null) - ? diskCheckTime - : Math.max(nodeHealthScriptRunner.getLastReportedTime(), diskCheckTime); - return lastReportTime; + return Collections.max(Arrays.asList( + dirsHandler.getLastDisksCheckTime(), + nodeHealthScriptRunner == null ? 0 : + nodeHealthScriptRunner.getLastReportedTime(), + nodeHealthExceptionReportTime)); } /** @@ -96,4 +111,13 @@ public class NodeHealthCheckerService extends CompositeService { NodeHealthScriptRunner getNodeHealthScriptRunner() { return nodeHealthScriptRunner; } + + /** + * Report an exception to mark the node as unhealthy. + * @param ex the exception that makes the node unhealthy + */ + void reportException(Exception ex) { + nodeHealthException = ex; + nodeHealthExceptionReportTime = System.currentTimeMillis(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java index f0b99ec..08892d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java @@ -53,4 +53,10 @@ public interface NodeStatusUpdater extends Service { * Clear the list of recently completed containers */ public void clearFinishedContainersFromCache(); + + /** + * Report an unrecoverable exception. + * @param ex exception that makes the node unhealthy + */ + void reportException(Exception ex); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 23395f8..4914dc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -990,6 +990,12 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return false; } + @Override + public void reportException(Exception ex) { + healthChecker.reportException(ex); + sendOutofBandHeartBeat(); + } + private List<LogAggregationReport> getLogAggregationReportsForApps( ConcurrentLinkedQueue<LogAggregationReport> lastestLogAggregationStatus) { LogAggregationReport status; http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index a1c407f..1fcccde 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; @@ -200,23 +201,23 @@ public class ContainerLaunch implements Callable<Integer> { FileContext lfs = FileContext.getLocalFSFileContext(); Path nmPrivateContainerScriptPath = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR - + CONTAINER_SCRIPT); + getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + + CONTAINER_SCRIPT); Path nmPrivateTokensPath = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR - + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, - containerIdStr)); + getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR + + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, + containerIdStr)); Path nmPrivateClasspathJarDir = dirsHandler.getLocalPathForWrite( - getContainerPrivateDir(appIdStr, containerIdStr)); + getContainerPrivateDir(appIdStr, containerIdStr)); DataOutputStream containerScriptOutStream = null; DataOutputStream tokensOutStream = null; // Select the working directory for the container Path containerWorkDir = dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE - + Path.SEPARATOR + user + Path.SEPARATOR - + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr - + Path.SEPARATOR + containerIdStr, + + Path.SEPARATOR + user + Path.SEPARATOR + + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr + + Path.SEPARATOR + containerIdStr, LocalDirAllocator.SIZE_UNKNOWN, false); recordContainerWorkDir(containerID, containerWorkDir.toString()); @@ -246,12 +247,12 @@ public class ContainerLaunch implements Callable<Integer> { appDirs.add(new Path(appsdir, appIdStr)); } containerScriptOutStream = - lfs.create(nmPrivateContainerScriptPath, - EnumSet.of(CREATE, OVERWRITE)); + lfs.create(nmPrivateContainerScriptPath, + EnumSet.of(CREATE, OVERWRITE)); // Set the token location too. environment.put( - ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, + ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE).toUri().getPath()); // Sanitize the container's environment @@ -266,7 +267,7 @@ public class ContainerLaunch implements Callable<Integer> { .setCommands(launchContext.getCommands()).build()); // Write out the environment exec.writeLaunchEnv(containerScriptOutStream, environment, - localResources, launchContext.getCommands(), + localResources, launchContext.getCommands(), new Path(containerLogDirs.get(0)), user); // /////////// End of writing out container-script @@ -294,6 +295,14 @@ public class ContainerLaunch implements Callable<Integer> { .setUserLocalDirs(userLocalDirs) .setContainerLocalDirs(containerLocalDirs) .setContainerLogDirs(containerLogDirs).build()); + } catch (ConfigurationException e) { + LOG.error("Failed to launch container due to configuration error.", e); + dispatcher.getEventHandler().handle(new ContainerExitEvent( + containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + e.getMessage())); + // Mark the node as unhealthy + context.getNodeStatusUpdater().reportException(e); + return ret; } catch (Throwable e) { LOG.warn("Failed to launch container.", e); dispatcher.getEventHandler().handle(new ContainerExitEvent( @@ -396,7 +405,8 @@ public class ContainerLaunch implements Callable<Integer> { } @SuppressWarnings("unchecked") - protected int launchContainer(ContainerStartContext ctx) throws IOException { + protected int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { ContainerId containerId = container.getContainerId(); if (container.isMarkedForKilling()) { LOG.info("Container " + containerId + " not launched as it has already " http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java index 1292df6..ac20fa8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; @@ -115,6 +116,14 @@ public class ContainerRelaunch extends ContainerLaunch { .setContainerLocalDirs(containerLocalDirs) .setContainerLogDirs(containerLogDirs) .build()); + } catch (ConfigurationException e) { + LOG.error("Failed to launch container due to configuration error.", e); + dispatcher.getEventHandler().handle(new ContainerExitEvent( + containerId, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + e.getMessage())); + // Mark the node as unhealthy + getContext().getNodeStatusUpdater().reportException(e); + return ret; } catch (Throwable e) { LOG.warn("Failed to relaunch container.", e); dispatcher.getEventHandler().handle(new ContainerExitEvent( http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 9be8cf4..bf08035 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -649,19 +649,21 @@ static int create_container_directories(const char* user, const char *app_id, const char *container_id, char* const* local_dir, char* const* log_dir, const char *work_dir) { // create dirs as 0750 const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; - if (app_id == NULL || container_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { + if (user == NULL || app_id == NULL || container_id == NULL || + local_dir == NULL || log_dir == NULL || work_dir == NULL || + user_detail == NULL || user_detail->pw_name == NULL) { fprintf(LOGFILE, "Either app_id, container_id or the user passed is null.\n"); - return -1; + return ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS; } - int result = -1; + int result = COULD_NOT_CREATE_WORK_DIRECTORIES; char* const* local_dir_ptr; for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) { char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, container_id); if (container_dir == NULL) { - return -1; + return OUT_OF_MEMORY; } if (mkdirs(container_dir, perms) == 0) { result = 0; @@ -674,12 +676,12 @@ static int create_container_directories(const char* user, const char *app_id, return result; } - result = -1; + result = COULD_NOT_CREATE_APP_LOG_DIRECTORIES; // also make the directory for the container logs char *combined_name = malloc(strlen(app_id) + strlen(container_id) + 2); if (combined_name == NULL) { fprintf(LOGFILE, "Malloc of combined name failed\n"); - result = -1; + result = OUT_OF_MEMORY; } else { sprintf(combined_name, "%s/%s", app_id, container_id); @@ -688,7 +690,7 @@ static int create_container_directories(const char* user, const char *app_id, char *container_log_dir = get_app_log_directory(*log_dir_ptr, combined_name); if (container_log_dir == NULL) { free(combined_name); - return -1; + return OUT_OF_MEMORY; } else if (mkdirs(container_log_dir, perms) != 0) { free(container_log_dir); } else { @@ -703,12 +705,12 @@ static int create_container_directories(const char* user, const char *app_id, return result; } - result = -1; + result = COULD_NOT_CREATE_TMP_DIRECTORIES; // also make the tmp directory char *tmp_dir = get_tmp_directory(work_dir); if (tmp_dir == NULL) { - return -1; + return OUT_OF_MEMORY; } if (mkdirs(tmp_dir, perms) == 0) { result = 0; @@ -1149,12 +1151,12 @@ char* parse_docker_command_file(const char* command_file) { fprintf(ERRORFILE, "Cannot open file %s - %s", command_file, strerror(errno)); fflush(ERRORFILE); - exit(ERROR_OPENING_FILE); + exit(ERROR_OPENING_DOCKER_FILE); } if ((read = getline(&line, &len, stream)) == -1) { fprintf(ERRORFILE, "Error reading command_file %s\n", command_file); fflush(ERRORFILE); - exit(ERROR_READING_FILE); + exit(ERROR_READING_DOCKER_FILE); } fclose(stream); @@ -1267,25 +1269,27 @@ int create_local_dirs(const char * user, const char *app_id, // Create container specific directories as user. If there are no resources // to localize for this container, app-directories and log-directories are // also created automatically as part of this call. - if (create_container_directories(user, app_id, container_id, local_dirs, - log_dirs, work_dir) != 0) { + int directory_create_result = create_container_directories(user, app_id, + container_id, local_dirs, log_dirs, work_dir); + if (directory_create_result != 0) { fprintf(ERRORFILE, "Could not create container dirs"); fflush(ERRORFILE); + exit_code = directory_create_result; goto cleanup; } - // 700 + // Copy script file with permissions 700 if (copy_file(container_file_source, script_name, script_file_dest,S_IRWXU) != 0) { fprintf(ERRORFILE, "Could not create copy file %d %s\n", container_file_source, script_file_dest); fflush(ERRORFILE); - exit_code = INVALID_COMMAND_PROVIDED; + exit_code = COULD_NOT_CREATE_SCRIPT_COPY; goto cleanup; } - // 600 + // Copy credential file to permissions 600 if (copy_file(cred_file_source, cred_file, cred_file_dest, S_IRUSR | S_IWUSR) != 0) { - exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT; + exit_code = COULD_NOT_CREATE_CREDENTIALS_FILE; fprintf(ERRORFILE, "Could not copy file"); fflush(ERRORFILE); goto cleanup; @@ -1870,7 +1874,7 @@ static int delete_path(const char *full_path, /* Return an error if the path is null. */ if (full_path == NULL) { fprintf(LOGFILE, "Path is null\n"); - return UNABLE_TO_BUILD_PATH; + return PATH_TO_DELETE_IS_NULL; } ret = recursive_unlink_children(full_path); if (ret == ENOENT) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index aa1c4a1..826ff2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -37,8 +37,8 @@ enum command { enum errorcodes { INVALID_ARGUMENT_NUMBER = 1, - INVALID_USER_NAME, //2 - INVALID_COMMAND_PROVIDED, //3 + //INVALID_USER_NAME 2 + INVALID_COMMAND_PROVIDED = 3, // SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS (NOT USED) 4 INVALID_NM_ROOT_DIRS = 5, SETUID_OPER_FAILED, //6 @@ -56,18 +56,24 @@ enum errorcodes { OUT_OF_MEMORY = 18, // INITIALIZE_DISTCACHEFILE_FAILED (NOT USED) 19 INITIALIZE_USER_FAILED = 20, - UNABLE_TO_BUILD_PATH, //21 + PATH_TO_DELETE_IS_NULL, //21 INVALID_CONTAINER_EXEC_PERMISSIONS, //22 // PREPARE_JOB_LOGS_FAILED (NOT USED) 23 - INVALID_CONFIG_FILE = 24, + INVALID_CONFIG_FILE = 24, SETSID_OPER_FAILED = 25, WRITE_PIDFILE_FAILED = 26, WRITE_CGROUP_FAILED = 27, TRAFFIC_CONTROL_EXECUTION_FAILED = 28, - DOCKER_RUN_FAILED=29, - ERROR_OPENING_FILE = 30, - ERROR_READING_FILE = 31, - FEATURE_DISABLED = 32 + DOCKER_RUN_FAILED = 29, + ERROR_OPENING_DOCKER_FILE = 30, + ERROR_READING_DOCKER_FILE = 31, + FEATURE_DISABLED = 32, + COULD_NOT_CREATE_SCRIPT_COPY = 33, + COULD_NOT_CREATE_CREDENTIALS_FILE = 34, + COULD_NOT_CREATE_WORK_DIRECTORIES = 35, + COULD_NOT_CREATE_APP_LOG_DIRECTORIES = 36, + COULD_NOT_CREATE_TMP_DIRECTORIES = 37, + ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS = 38, }; enum operations { http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java index 215de91..2e9eff5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDefaultContainerExecutor.java @@ -61,6 +61,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol; import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; @@ -226,7 +227,7 @@ public class TestDefaultContainerExecutor { @Test public void testContainerLaunchError() - throws IOException, InterruptedException { + throws IOException, InterruptedException, ConfigurationException { if (Shell.WINDOWS) { BASE_TMP_PATH = http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java index 88ebf8d..d67bd76 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -305,11 +306,13 @@ public class TestLinuxContainerExecutor { return cId; } - private int runAndBlock(String... cmd) throws IOException { + private int runAndBlock(String... cmd) + throws IOException, ConfigurationException { return runAndBlock(getNextContainerId(), cmd); } - private int runAndBlock(ContainerId cId, String... cmd) throws IOException { + private int runAndBlock(ContainerId cId, String... cmd) + throws IOException, ConfigurationException { String appId = "APP_" + getNextId(); Container container = mock(Container.class); ContainerLaunchContext context = mock(ContainerLaunchContext.class); @@ -448,7 +451,7 @@ public class TestLinuxContainerExecutor { public void run() { try { runAndBlock(sleepId, "sleep", "100"); - } catch (IOException e) { + } catch (IOException|ConfigurationException e) { LOG.warn("Caught exception while running sleep", e); } }; http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 23f58bb..07134e8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Mockito.doAnswer; @@ -49,6 +50,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; @@ -78,6 +80,8 @@ public class TestLinuxContainerExecutorWithMocks { "./src/test/resources/mock-container-executor"; private static final String MOCK_EXECUTOR_WITH_ERROR = "./src/test/resources/mock-container-executer-with-error"; + private static final String MOCK_EXECUTOR_WITH_CONFIG_ERROR = + "./src/test/resources/mock-container-executer-with-configuration-error"; private String tmpMockExecutor; private LinuxContainerExecutor mockExec = null; @@ -147,7 +151,8 @@ public class TestLinuxContainerExecutorWithMocks { } @Test - public void testContainerLaunch() throws IOException { + public void testContainerLaunch() + throws IOException, ConfigurationException { String appSubmitter = "nobody"; String cmd = String.valueOf( PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.getValue()); @@ -198,7 +203,8 @@ public class TestLinuxContainerExecutorWithMocks { } @Test (timeout = 5000) - public void testContainerLaunchWithPriority() throws IOException { + public void testContainerLaunchWithPriority() + throws IOException, ConfigurationException { // set the scheduler priority to make sure still works with nice -n prio Configuration conf = new Configuration(); @@ -272,102 +278,121 @@ public class TestLinuxContainerExecutorWithMocks { public void testContainerLaunchError() throws IOException, ContainerExecutionException { - // reinitialize executer - Configuration conf = new Configuration(); - setupMockExecutor(MOCK_EXECUTOR_WITH_ERROR, conf); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, "file:///bin/echo"); - conf.set(YarnConfiguration.NM_LOG_DIRS, "file:///dev/null"); - - - LinuxContainerExecutor exec; - LinuxContainerRuntime linuxContainerRuntime = new - DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance - (conf)); - - linuxContainerRuntime.initialize(conf); - exec = new LinuxContainerExecutor(linuxContainerRuntime); - - mockExec = spy(exec); - doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocationOnMock) - throws Throwable { - String diagnostics = (String) invocationOnMock.getArguments()[0]; - assertTrue("Invalid Diagnostics message: " + diagnostics, - diagnostics.contains("badcommand")); - return null; + final String[] expecetedMessage = {"badcommand", "Exit code: 24"}; + final String[] executor = { + MOCK_EXECUTOR_WITH_ERROR, + MOCK_EXECUTOR_WITH_CONFIG_ERROR + }; + + for (int i = 0; i < expecetedMessage.length; ++i) { + final int j = i; + // reinitialize executer + Configuration conf = new Configuration(); + setupMockExecutor(executor[j], conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, "file:///bin/echo"); + conf.set(YarnConfiguration.NM_LOG_DIRS, "file:///dev/null"); + + + LinuxContainerExecutor exec; + LinuxContainerRuntime linuxContainerRuntime = new + DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance( + conf)); + + linuxContainerRuntime.initialize(conf); + exec = new LinuxContainerExecutor(linuxContainerRuntime); + + mockExec = spy(exec); + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocationOnMock) + throws Throwable { + String diagnostics = (String) invocationOnMock.getArguments()[0]; + assertTrue("Invalid Diagnostics message: " + diagnostics, + diagnostics.contains(expecetedMessage[j])); + return null; + } } - } - ).when(mockExec).logOutput(any(String.class)); - dirsHandler = new LocalDirsHandlerService(); - dirsHandler.init(conf); - mockExec.setConf(conf); - - String appSubmitter = "nobody"; - String cmd = String - .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.getValue()); - String appId = "APP_ID"; - String containerId = "CONTAINER_ID"; - Container container = mock(Container.class); - ContainerId cId = mock(ContainerId.class); - ContainerLaunchContext context = mock(ContainerLaunchContext.class); - HashMap<String, String> env = new HashMap<String, String>(); - - when(container.getContainerId()).thenReturn(cId); - when(container.getLaunchContext()).thenReturn(context); - doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocationOnMock) - throws Throwable { - ContainerDiagnosticsUpdateEvent event = - (ContainerDiagnosticsUpdateEvent) invocationOnMock - .getArguments()[0]; - assertTrue("Invalid Diagnostics message: " + - event.getDiagnosticsUpdate(), - event.getDiagnosticsUpdate().contains("badcommand")); - return null; + ).when(mockExec).logOutput(any(String.class)); + dirsHandler = new LocalDirsHandlerService(); + dirsHandler.init(conf); + mockExec.setConf(conf); + + String appSubmitter = "nobody"; + String cmd = String + .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER. + getValue()); + String appId = "APP_ID"; + String containerId = "CONTAINER_ID"; + Container container = mock(Container.class); + ContainerId cId = mock(ContainerId.class); + ContainerLaunchContext context = mock(ContainerLaunchContext.class); + HashMap<String, String> env = new HashMap<String, String>(); + + when(container.getContainerId()).thenReturn(cId); + when(container.getLaunchContext()).thenReturn(context); + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocationOnMock) + throws Throwable { + ContainerDiagnosticsUpdateEvent event = + (ContainerDiagnosticsUpdateEvent) invocationOnMock + .getArguments()[0]; + assertTrue("Invalid Diagnostics message: " + + event.getDiagnosticsUpdate(), + event.getDiagnosticsUpdate().contains(expecetedMessage[j])); + return null; + } } - } - ).when(container).handle(any(ContainerDiagnosticsUpdateEvent.class)); - - when(cId.toString()).thenReturn(containerId); - - when(context.getEnvironment()).thenReturn(env); - - Path scriptPath = new Path("file:///bin/echo"); - Path tokensPath = new Path("file:///dev/null"); - Path workDir = new Path("/tmp"); - Path pidFile = new Path(workDir, "pid.txt"); - - mockExec.activateContainer(cId, pidFile); - - int ret = mockExec.launchContainer(new ContainerStartContext.Builder() - .setContainer(container) - .setNmPrivateContainerScriptPath(scriptPath) - .setNmPrivateTokensPath(tokensPath) - .setUser(appSubmitter) - .setAppId(appId) - .setContainerWorkDir(workDir) - .setLocalDirs(dirsHandler.getLocalDirs()) - .setLogDirs(dirsHandler.getLogDirs()) - .setFilecacheDirs(new ArrayList<>()) - .setUserLocalDirs(new ArrayList<>()) - .setContainerLocalDirs(new ArrayList<>()) - .setContainerLogDirs(new ArrayList<>()) - .build()); - - Assert.assertNotSame(0, ret); - assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, - appSubmitter, cmd, appId, containerId, - workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), - StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, - dirsHandler.getLocalDirs()), - StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, - dirsHandler.getLogDirs()), - "cgroups=none"), readMockParams()); - + ).when(container).handle(any(ContainerDiagnosticsUpdateEvent.class)); + + when(cId.toString()).thenReturn(containerId); + + when(context.getEnvironment()).thenReturn(env); + + Path scriptPath = new Path("file:///bin/echo"); + Path tokensPath = new Path("file:///dev/null"); + Path workDir = new Path("/tmp"); + Path pidFile = new Path(workDir, "pid.txt"); + + mockExec.activateContainer(cId, pidFile); + + try { + int ret = mockExec.launchContainer(new ContainerStartContext.Builder() + .setContainer(container) + .setNmPrivateContainerScriptPath(scriptPath) + .setNmPrivateTokensPath(tokensPath) + .setUser(appSubmitter) + .setAppId(appId) + .setContainerWorkDir(workDir) + .setLocalDirs(dirsHandler.getLocalDirs()) + .setLogDirs(dirsHandler.getLogDirs()) + .setFilecacheDirs(new ArrayList<>()) + .setUserLocalDirs(new ArrayList<>()) + .setContainerLocalDirs(new ArrayList<>()) + .setContainerLogDirs(new ArrayList<>()) + .build()); + + Assert.assertNotSame(0, ret); + assertEquals(Arrays.asList(YarnConfiguration. + DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, appId, containerId, + workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), + StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, + dirsHandler.getLocalDirs()), + StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR, + dirsHandler.getLogDirs()), + "cgroups=none"), readMockParams()); + + assertNotEquals("Expected YarnRuntimeException", + MOCK_EXECUTOR_WITH_CONFIG_ERROR, executor[i]); + } catch (ConfigurationException ex) { + assertEquals(MOCK_EXECUTOR_WITH_CONFIG_ERROR, executor[i]); + Assert.assertEquals("Linux Container Executor reached unrecoverable " + + "exception", ex.getMessage()); + } + } } @Test http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java index 4b01349..c564c01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java @@ -23,6 +23,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -165,8 +167,11 @@ public class TestNodeHealthService { healthStatus.getIsNodeHealthy()); Assert.assertTrue("Node script time out message not propagated", healthStatus.getHealthReport().equals( - NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG - + NodeHealthCheckerService.SEPARATOR - + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); + Joiner.on(NodeHealthCheckerService.SEPARATOR).skipNulls().join( + NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, + Strings.emptyToNull( + nodeHealthChecker.getDiskHandler() + .getDisksHealthReport(false)) + ))); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 8dcf4be..e71ce75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -20,11 +20,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; +import static org.junit.Assert.*; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.*; import java.io.BufferedReader; import java.io.File; @@ -47,6 +45,7 @@ import java.util.StringTokenizer; import java.util.jar.JarFile; import java.util.jar.Manifest; +import com.google.common.collect.Lists; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -82,17 +81,17 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; -import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.ShellScriptBuilder; @@ -115,7 +114,7 @@ import org.junit.Test; public class TestContainerLaunch extends BaseContainerManagerTest { private static final String INVALID_JAVA_HOME = "/no/jvm/here"; - private Context distContext = + private NMContext distContext = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf), new NMNullStateStoreService(), @@ -1480,4 +1479,54 @@ public class TestContainerLaunch extends BaseContainerManagerTest { } } } + + /** + * Test container launch fault. + * @throws Exception + */ + @Test + public void testContainerLaunchOnConfigurationError() throws Exception { + Dispatcher dispatcher = mock(Dispatcher.class); + EventHandler handler = mock(EventHandler.class); + when(dispatcher.getEventHandler()).thenReturn(handler); + Application app = mock(Application.class); + ApplicationId appId = mock(ApplicationId.class); + when(appId.toString()).thenReturn("1"); + when(app.getAppId()).thenReturn(appId); + Container container = mock(Container.class); + ContainerId id = mock(ContainerId.class); + when(id.toString()).thenReturn("1"); + when(container.getContainerId()).thenReturn(id); + when(container.getUser()).thenReturn("user"); + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + when(clc.getCommands()).thenReturn(Lists.newArrayList()); + when(container.getLaunchContext()).thenReturn(clc); + Credentials credentials = mock(Credentials.class); + when(container.getCredentials()).thenReturn(credentials); + + // Configuration errors should result in node shutdown... + ContainerExecutor returnConfigError = mock(ContainerExecutor.class); + when(returnConfigError.launchContainer(any())). + thenThrow(new ConfigurationException("Mock configuration error")); + ContainerLaunch launchConfigError = new ContainerLaunch( + distContext, conf, dispatcher, + returnConfigError, app, container, dirsHandler, containerManager); + NodeStatusUpdater updater = mock(NodeStatusUpdater.class); + distContext.setNodeStatusUpdater(updater); + launchConfigError.call(); + verify(updater, atLeastOnce()).reportException(any()); + + // ... any other error should continue. + ContainerExecutor returnOtherError = mock(ContainerExecutor.class); + when(returnOtherError.launchContainer(any())). + thenThrow(new IOException("Mock configuration error")); + ContainerLaunch launchOtherError = new ContainerLaunch( + distContext, conf, dispatcher, + returnOtherError, app, container, dirsHandler, containerManager); + NodeStatusUpdater updaterNoCall = mock(NodeStatusUpdater.class); + distContext.setNodeStatusUpdater(updaterNoCall); + launchOtherError.call(); + verify(updaterNoCall, never()).reportException(any()); + + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java index 29a60f7..e21eea0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -80,7 +81,7 @@ public class TestContainersMonitorResourceChange { } @Override public int launchContainer(ContainerStartContext ctx) throws - IOException { + IOException, ConfigurationException { return 0; } @Override http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java index 24e388f..a98a341 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ConfigurationException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; @@ -126,7 +127,8 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest { protected ContainerExecutor createContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor() { @Override - public int launchContainer(ContainerStartContext ctx) throws IOException { + public int launchContainer(ContainerStartContext ctx) + throws IOException, ConfigurationException { if (delayContainers) { try { Thread.sleep(10000); http://git-wip-us.apache.org/repos/asf/hadoop/blob/46940d92/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error new file mode 100644 index 0000000..98f14da --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/resources/mock-container-executer-with-configuration-error @@ -0,0 +1,20 @@ +#!/bin/sh +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +for PARAM in "$@" +do + echo $PARAM; +done > params.txt + +exit 24 + --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org