This is an automated email from the ASF dual-hosted git repository.
wangda pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 0a01d49 YARN-8822. Nvidia-docker v2 support for YARN GPU feature.
(Charo Zhang via wangda)
0a01d49 is described below
commit 0a01d499175569ffac9d3f31d980820ecac7e60b
Author: Wangda Tan <[email protected]>
AuthorDate: Mon Jan 7 12:07:26 2019 -0800
YARN-8822. Nvidia-docker v2 support for YARN GPU feature. (Charo Zhang via
wangda)
Change-Id: Id8af27134d3286a7a10d85eda9be25df9689d0e7
---
.../hadoop-yarn/conf/container-executor.cfg | 1 +
.../apache/hadoop/yarn/conf/YarnConfiguration.java | 3 +
.../linux/runtime/docker/DockerRunCommand.java | 5 +
.../gpu/GpuDockerCommandPluginFactory.java | 4 +
.../gpu/NvidiaDockerV2CommandPlugin.java | 111 ++++++++++++++++++
.../container-executor/impl/utils/docker-util.c | 20 ++++
.../container-executor/impl/utils/docker-util.h | 3 +-
.../test/utils/test_docker_util.cc | 62 ++++++++++
.../linux/runtime/docker/TestDockerRunCommand.java | 5 +-
.../gpu/TestNvidiaDockerV2CommandPlugin.java | 130 +++++++++++++++++++++
.../src/site/markdown/DockerContainers.md | 1 +
.../src/site/markdown/UsingGpus.md | 9 +-
12 files changed, 351 insertions(+), 3 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
index d19874f..4df53df 100644
--- a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
+++ b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
@@ -16,6 +16,7 @@ feature.tc.enabled=false
# docker.privileged-containers.enabled=false
# docker.allowed.volume-drivers=## comma seperated list of allowed
volume-drivers
# docker.no-new-privileges.enabled=## enable/disable the no-new-privileges
flag for docker run. Set to "true" to enable, disabled by default
+# docker.allowed.runtimes=## comma seperated runtimes that can be used.
# The configs below deal with settings for FPGA resource
#[fpga]
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 08f7f1a..c29707c 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1665,6 +1665,9 @@ public class YarnConfiguration extends Configuration {
public static final String NVIDIA_DOCKER_V1 = "nvidia-docker-v1";
@Private
+ public static final String NVIDIA_DOCKER_V2 = "nvidia-docker-v2";
+
+ @Private
public static final String DEFAULT_NM_GPU_DOCKER_PLUGIN_IMPL =
NVIDIA_DOCKER_V1;
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
index f4f3a9c..b0603a3 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
@@ -165,6 +165,11 @@ public class DockerRunCommand extends DockerCommand {
return this;
}
+ public DockerRunCommand addRuntime(String runtime) {
+ super.addCommandArguments("runtime", runtime);
+ return this;
+ }
+
public DockerRunCommand groupAdd(String[] groups) {
super.addCommandArguments("group-add", String.join(",", groups));
return this;
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
index db4589a..051afd6 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
@@ -34,6 +34,10 @@ public class GpuDockerCommandPluginFactory {
if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V1)) {
return new NvidiaDockerV1CommandPlugin(conf);
}
+ // nvidia-docker2
+ if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V2)) {
+ return new NvidiaDockerV2CommandPlugin();
+ }
throw new YarnException(
"Unkown implementation name for Gpu docker plugin, impl=" + impl);
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
new file mode 100644
index 0000000..ff25eb6
--- /dev/null
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Implementation to use nvidia-docker v2 as GPU docker command plugin.
+ */
+public class NvidiaDockerV2CommandPlugin implements DockerCommandPlugin {
+ final static Log LOG = LogFactory.getLog(NvidiaDockerV2CommandPlugin.class);
+
+ private String nvidiaRuntime = "nvidia";
+ private String nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES";
+
+ public NvidiaDockerV2CommandPlugin() {}
+
+ private Set<GpuDevice> getAssignedGpus(Container container) {
+ ResourceMappings resourceMappings = container.getResourceMappings();
+
+ // Copy of assigned Resources
+ Set<GpuDevice> assignedResources = null;
+ if (resourceMappings != null) {
+ assignedResources = new HashSet<>();
+ for (Serializable s : resourceMappings.getAssignedResources(
+ ResourceInformation.GPU_URI)) {
+ assignedResources.add((GpuDevice) s);
+ }
+ }
+ if (assignedResources == null || assignedResources.isEmpty()) {
+ // When no GPU resource assigned, don't need to update docker command.
+ return Collections.emptySet();
+ }
+ return assignedResources;
+ }
+
+ @VisibleForTesting
+ protected boolean requestsGpu(Container container) {
+ return GpuResourceAllocator.getRequestedGpus(container.getResource()) > 0;
+ }
+
+ @Override
+ public synchronized void updateDockerRunCommand(
+ DockerRunCommand dockerRunCommand, Container container)
+ throws ContainerExecutionException {
+ if (!requestsGpu(container)) {
+ return;
+ }
+ Set<GpuDevice> assignedResources = getAssignedGpus(container);
+ if (assignedResources == null || assignedResources.isEmpty()) {
+ return;
+ }
+ Map<String, String> environment = new HashMap<>();
+ String gpuIndexList = "";
+ for (GpuDevice gpuDevice : assignedResources) {
+ gpuIndexList = gpuIndexList + gpuDevice.getIndex() + ",";
+ LOG.info("nvidia docker2 assigned gpu index: " + gpuDevice.getIndex());
+ }
+ dockerRunCommand.addRuntime(nvidiaRuntime);
+ environment.put(nvidiaVisibleDevices,
+ gpuIndexList.substring(0, gpuIndexList.length() - 1));
+ dockerRunCommand.addEnv(environment);
+ }
+
+ @Override
+ public DockerVolumeCommand getCreateDockerVolumeCommand(Container container)
+ throws ContainerExecutionException {
+ // No Volume needed for nvidia-docker2.
+ return null;
+ }
+
+ @Override
+ public DockerVolumeCommand getCleanupDockerVolumesCommand(Container
container)
+ throws ContainerExecutionException {
+ // No cleanup needed.
+ return null;
+ }
+}
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
index ced7424..0a5d2ed 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
@@ -351,6 +351,8 @@ const char *get_docker_error_message(const int error_code) {
return "Docker image is not trusted";
case INVALID_DOCKER_TMPFS_MOUNT:
return "Invalid docker tmpfs mount";
+ case INVALID_DOCKER_RUNTIME:
+ return "Invalid docker runtime";
default:
return "Unknown error";
}
@@ -947,6 +949,19 @@ static int set_network(const struct configuration
*command_config,
return ret;
}
+static int set_runtime(const struct configuration *command_config,
+ const struct configuration *conf, args *args) {
+ int ret = 0;
+ ret = add_param_to_command_if_allowed(command_config, conf, "runtime",
+ "docker.allowed.runtimes",
"--runtime=",
+ 0, 0, args);
+ if (ret != 0) {
+ fprintf(ERRORFILE, "Could not find requested runtime in allowed
runtimes\n");
+ ret = INVALID_DOCKER_RUNTIME;
+ }
+ return ret;
+}
+
static int add_ports_mapping_to_command(const struct configuration
*command_config, args *args) {
int i = 0, ret = 0;
char *network_type = (char*) malloc(128);
@@ -1654,6 +1669,11 @@ int get_docker_run_command(const char *command_file,
const struct configuration
goto free_and_exit;
}
+ ret = set_runtime(&command_config, conf, args);
+ if (ret != 0) {
+ goto free_and_exit;
+ }
+
ret = set_hostname(&command_config, args);
if (ret != 0) {
goto free_and_exit;
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
index 3cff565..3b8922d 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
@@ -69,7 +69,8 @@ enum docker_error_codes {
PID_HOST_DISABLED,
INVALID_PID_NAMESPACE,
INVALID_DOCKER_IMAGE_TRUST,
- INVALID_DOCKER_TMPFS_MOUNT
+ INVALID_DOCKER_TMPFS_MOUNT,
+ INVALID_DOCKER_RUNTIME
};
/**
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
index 66e987e..6c239d2 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
@@ -444,6 +444,68 @@ namespace ContainerExecutor {
run_docker_run_helper_function(file_cmd_vec, set_hostname);
}
+ TEST_F(TestDockerUtil, test_set_runtime) {
+ struct configuration container_cfg;
+ struct args buff = ARGS_INITIAL_VALUE;
+ int ret = 0;
+ std::string container_executor_cfg_contents = "[docker]\n"
+ " docker.trusted.registries=hadoop\n"
+ " docker.allowed.runtimes=lxc,nvidia";
+ std::vector<std::pair<std::string, std::string> > file_cmd_vec;
+ file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+ "[docker-command-execution]\n docker-command=run\n
image=hadoop/image\n runtime=lxc", "--runtime=lxc"));
+ file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+ "[docker-command-execution]\n docker-command=run\n
image=hadoop/image\n runtime=nvidia", "--runtime=nvidia"));
+ file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+ "[docker-command-execution]\n docker-command=run", ""));
+ write_container_executor_cfg(container_executor_cfg_contents);
+ ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
+
+ std::vector<std::pair<std::string, std::string> >::const_iterator itr;
+ if (ret != 0) {
+ FAIL();
+ }
+ for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
+ struct configuration cmd_cfg;
+ write_command_file(itr->first);
+ ret = read_config(docker_command_file.c_str(), &cmd_cfg);
+ if (ret != 0) {
+ FAIL();
+ }
+ ret = set_runtime(&cmd_cfg, &container_cfg, &buff);
+ char *actual = flatten(&buff);
+ ASSERT_EQ(0, ret) << "error message: " << get_docker_error_message(ret)
<< " for input " << itr->first;
+ ASSERT_STREQ(itr->second.c_str(), actual);
+ reset_args(&buff);
+ free(actual);
+ free_configuration(&cmd_cfg);
+ }
+ struct configuration cmd_cfg_1;
+ write_command_file("[docker-command-execution]\n docker-command=run\n
runtime=nvidia1");
+ ret = read_config(docker_command_file.c_str(), &cmd_cfg_1);
+ if (ret != 0) {
+ FAIL();
+ }
+ ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
+ ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
+ ASSERT_EQ(0, buff.length);
+ reset_args(&buff);
+ free_configuration(&container_cfg);
+
+ container_executor_cfg_contents = "[docker]\n";
+ write_container_executor_cfg(container_executor_cfg_contents);
+ ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
+ if (ret != 0) {
+ FAIL();
+ }
+ ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
+ ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
+ ASSERT_EQ(0, buff.length);
+ reset_args(&buff);
+ free_configuration(&cmd_cfg_1);
+ free_configuration(&container_cfg);
+ }
+
TEST_F(TestDockerUtil, test_set_group_add) {
std::vector<std::pair<std::string, std::string> > file_cmd_vec;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
index d01c184..4ee76c7 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
@@ -63,6 +63,7 @@ public class TestDockerRunCommand {
for (String mapping:portsMapping.split(",")) {
dockerRunCommand.addPortsMapping(mapping);
}
+ dockerRunCommand.addRuntime("nvidia");
assertEquals("run", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments()
@@ -86,7 +87,9 @@ public class TestDockerRunCommand {
assertEquals("127.0.0.1:8080:80,1234:1234,:2222", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments()
.get("ports-mapping")));
- assertEquals(9, dockerRunCommand.getDockerCommandWithArguments().size());
+ assertEquals("nvidia", StringUtils.join(",",
+ dockerRunCommand.getDockerCommandWithArguments().get("runtime")));
+ assertEquals(10, dockerRunCommand.getDockerCommandWithArguments().size());
}
@Test
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
new file mode 100644
index 0000000..b0b5233
--- /dev/null
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * test for NvidiaDockerV2CommandPlugin.
+ */
+public class TestNvidiaDockerV2CommandPlugin {
+ private Map<String, List<String>> copyCommandLine(
+ Map<String, List<String>> map) {
+ Map<String, List<String>> ret = new HashMap<>();
+ for (Map.Entry<String, List<String>> entry : map.entrySet()) {
+ ret.put(entry.getKey(), new ArrayList<>(entry.getValue()));
+ }
+ return ret;
+ }
+
+ private boolean commandlinesEquals(Map<String, List<String>> cli1,
+ Map<String, List<String>> cli2) {
+ if (!Sets.symmetricDifference(cli1.keySet(), cli2.keySet()).isEmpty()) {
+ return false;
+ }
+
+ for (String key : cli1.keySet()) {
+ List<String> value1 = cli1.get(key);
+ List<String> value2 = cli2.get(key);
+ if (!value1.equals(value2)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ static class MyNvidiaDockerV2CommandPlugin
+ extends NvidiaDockerV2CommandPlugin {
+ private boolean requestsGpu = false;
+
+ MyNvidiaDockerV2CommandPlugin() {}
+
+ public void setRequestsGpu(boolean r) {
+ requestsGpu = r;
+ }
+
+ @Override
+ protected boolean requestsGpu(Container container) {
+ return requestsGpu;
+ }
+ }
+
+ @Test
+ public void testPlugin() throws Exception {
+ DockerRunCommand runCommand = new DockerRunCommand("container_1", "user",
+ "fakeimage");
+
+ Map<String, List<String>> originalCommandline = copyCommandLine(
+ runCommand.getDockerCommandWithArguments());
+
+ MyNvidiaDockerV2CommandPlugin
+ commandPlugin = new MyNvidiaDockerV2CommandPlugin();
+
+ Container nmContainer = mock(Container.class);
+
+ // getResourceMapping is null, so commandline won't be updated
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Assert.assertTrue(commandlinesEquals(originalCommandline,
+ runCommand.getDockerCommandWithArguments()));
+
+ // no GPU resource assigned, so commandline won't be updated
+ ResourceMappings resourceMappings = new ResourceMappings();
+ when(nmContainer.getResourceMappings()).thenReturn(resourceMappings);
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Assert.assertTrue(commandlinesEquals(originalCommandline,
+ runCommand.getDockerCommandWithArguments()));
+
+ // Assign GPU resource
+ ResourceMappings.AssignedResources assigned =
+ new ResourceMappings.AssignedResources();
+ assigned.updateAssignedResources(
+ ImmutableList.of(new GpuDevice(0, 0), new GpuDevice(1, 1)));
+ resourceMappings.addAssignedResources(ResourceInformation.GPU_URI,
+ assigned);
+
+ commandPlugin.setRequestsGpu(true);
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Map<String, List<String>> newCommandLine =
+ runCommand.getDockerCommandWithArguments();
+
+ // Command line will be updated
+ Assert.assertFalse(commandlinesEquals(originalCommandline,
newCommandLine));
+ // NVIDIA_VISIBLE_DEVICES will be set
+ Assert.assertTrue(
+ runCommand.getEnv().get("NVIDIA_VISIBLE_DEVICES").equals("0,1"));
+ // runtime should exist
+ Assert.assertTrue(newCommandLine.containsKey("runtime"));
+ }
+}
\ No newline at end of file
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
index 5620299..2a893e4 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
@@ -274,6 +274,7 @@ are allowed. It contains the following properties:
| `docker.trusted.registries` | Comma separated list of trusted docker
registries for running trusted privileged docker containers. By default, no
registries are defined. |
| `docker.inspect.max.retries` | Integer value to check docker container
readiness. Each inspection is set with 3 seconds delay. Default value of 10
will wait 30 seconds for docker container to become ready before marked as
container failed. |
| `docker.no-new-privileges.enabled` | Enable/disable the no-new-privileges
flag for docker run. Set to "true" to enable, disabled by default. |
+| `docker.allowed.runtimes` | Comma seperated runtimes that containers are
allowed to use. By default no runtimes are allowed to be added.|
Please note that if you wish to run Docker containers that require access to
the YARN local directories, you must add them to the docker.allowed.rw-mounts
list.
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
index f6000e7..85412af 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
@@ -107,7 +107,7 @@ Following configs can be customized when user needs to run
GPU applications insi
| --- | --- |
| yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 |
-Specify docker command plugin for GPU. By default uses Nvidia docker V1.0.
+Specify docker command plugin for GPU. By default uses Nvidia docker V1.0,
`nvidia-docker-v2` is available for V2.x.
| Property | Default value |
| --- | --- |
@@ -169,6 +169,13 @@ docker.allowed.volume-drivers
...
docker.allowed.ro-mounts=nvidia_driver_375.66
```
+**4) If use `nvidia-docker-v2` as gpu docker plugin, add `nvidia` to runtimes
whitelist.**
+
+```
+[docker]
+...
+docker.allowed.runtimes=nvidia
+```
# Use it
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]