This is an automated email from the ASF dual-hosted git repository.

sunilg pushed a commit to branch branch-3.2.0
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2.0 by this push:
     new c968568  YARN-8822. Nvidia-docker v2 support for YARN GPU feature. 
(Charo Zhang via Sunil Govindan)
c968568 is described below

commit c968568c589ab0f87982b7e08b941acb15a9c9f6
Author: Sunil G <sun...@apache.org>
AuthorDate: Tue Jan 8 07:12:11 2019 +0530

    YARN-8822. Nvidia-docker v2 support for YARN GPU feature. (Charo Zhang via 
Sunil Govindan)
---
 .../hadoop-yarn/conf/container-executor.cfg        |   1 +
 .../apache/hadoop/yarn/conf/YarnConfiguration.java |   3 +
 .../linux/runtime/docker/DockerRunCommand.java     |   5 +
 .../gpu/GpuDockerCommandPluginFactory.java         |   4 +
 .../gpu/NvidiaDockerV2CommandPlugin.java           | 111 ++++++++++++++++++
 .../container-executor/impl/utils/docker-util.c    |  20 ++++
 .../container-executor/impl/utils/docker-util.h    |   3 +-
 .../test/utils/test_docker_util.cc                 |  62 ++++++++++
 .../linux/runtime/docker/TestDockerRunCommand.java |   5 +-
 .../gpu/TestNvidiaDockerV2CommandPlugin.java       | 130 +++++++++++++++++++++
 .../src/site/markdown/DockerContainers.md          |   1 +
 .../src/site/markdown/UsingGpus.md                 |   9 +-
 12 files changed, 351 insertions(+), 3 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg 
b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
index d19874f..4df53df 100644
--- a/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
+++ b/hadoop-yarn-project/hadoop-yarn/conf/container-executor.cfg
@@ -16,6 +16,7 @@ feature.tc.enabled=false
 #  docker.privileged-containers.enabled=false
 #  docker.allowed.volume-drivers=## comma seperated list of allowed 
volume-drivers
 #  docker.no-new-privileges.enabled=## enable/disable the no-new-privileges 
flag for docker run. Set to "true" to enable, disabled by default
+#  docker.allowed.runtimes=## comma seperated runtimes that can be used.
 
 # The configs below deal with settings for FPGA resource
 #[fpga]
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 95861d7..5f0ad9a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1636,6 +1636,9 @@ public class YarnConfiguration extends Configuration {
   public static final String NVIDIA_DOCKER_V1 = "nvidia-docker-v1";
 
   @Private
+  public static final String NVIDIA_DOCKER_V2 = "nvidia-docker-v2";
+
+  @Private
   public static final String DEFAULT_NM_GPU_DOCKER_PLUGIN_IMPL =
       NVIDIA_DOCKER_V1;
 
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
index 395c1e1..061cab1 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerRunCommand.java
@@ -159,6 +159,11 @@ public class DockerRunCommand extends DockerCommand {
     return this;
   }
 
+  public DockerRunCommand addRuntime(String runtime) {
+    super.addCommandArguments("runtime", runtime);
+    return this;
+  }
+
   public DockerRunCommand groupAdd(String[] groups) {
     super.addCommandArguments("group-add", String.join(",", groups));
     return this;
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
index db4589a..051afd6 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDockerCommandPluginFactory.java
@@ -34,6 +34,10 @@ public class GpuDockerCommandPluginFactory {
     if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V1)) {
       return new NvidiaDockerV1CommandPlugin(conf);
     }
+    // nvidia-docker2
+    if (impl.equals(YarnConfiguration.NVIDIA_DOCKER_V2)) {
+      return new NvidiaDockerV2CommandPlugin();
+    }
 
     throw new YarnException(
         "Unkown implementation name for Gpu docker plugin, impl=" + impl);
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
new file mode 100644
index 0000000..ff25eb6
--- /dev/null
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV2CommandPlugin.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package 
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Implementation to use nvidia-docker v2 as GPU docker command plugin.
+ */
+public class NvidiaDockerV2CommandPlugin implements DockerCommandPlugin {
+  final static Log LOG = LogFactory.getLog(NvidiaDockerV2CommandPlugin.class);
+
+  private String nvidiaRuntime = "nvidia";
+  private String nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES";
+
+  public NvidiaDockerV2CommandPlugin() {}
+
+  private Set<GpuDevice> getAssignedGpus(Container container) {
+    ResourceMappings resourceMappings = container.getResourceMappings();
+
+    // Copy of assigned Resources
+    Set<GpuDevice> assignedResources = null;
+    if (resourceMappings != null) {
+      assignedResources = new HashSet<>();
+      for (Serializable s : resourceMappings.getAssignedResources(
+          ResourceInformation.GPU_URI)) {
+        assignedResources.add((GpuDevice) s);
+      }
+    }
+    if (assignedResources == null || assignedResources.isEmpty()) {
+      // When no GPU resource assigned, don't need to update docker command.
+      return Collections.emptySet();
+    }
+    return assignedResources;
+  }
+
+  @VisibleForTesting
+  protected boolean requestsGpu(Container container) {
+    return GpuResourceAllocator.getRequestedGpus(container.getResource()) > 0;
+  }
+
+  @Override
+  public synchronized void updateDockerRunCommand(
+      DockerRunCommand dockerRunCommand, Container container)
+      throws ContainerExecutionException {
+    if (!requestsGpu(container)) {
+      return;
+    }
+    Set<GpuDevice> assignedResources = getAssignedGpus(container);
+    if (assignedResources == null || assignedResources.isEmpty()) {
+      return;
+    }
+    Map<String, String> environment = new HashMap<>();
+    String gpuIndexList = "";
+    for (GpuDevice gpuDevice : assignedResources) {
+      gpuIndexList = gpuIndexList + gpuDevice.getIndex() + ",";
+      LOG.info("nvidia docker2 assigned gpu index: " + gpuDevice.getIndex());
+    }
+    dockerRunCommand.addRuntime(nvidiaRuntime);
+    environment.put(nvidiaVisibleDevices,
+            gpuIndexList.substring(0, gpuIndexList.length() - 1));
+    dockerRunCommand.addEnv(environment);
+  }
+
+  @Override
+  public DockerVolumeCommand getCreateDockerVolumeCommand(Container container)
+      throws ContainerExecutionException {
+    // No Volume needed for nvidia-docker2.
+    return null;
+  }
+
+  @Override
+  public DockerVolumeCommand getCleanupDockerVolumesCommand(Container 
container)
+      throws ContainerExecutionException {
+    // No cleanup needed.
+    return null;
+  }
+}
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
index 69f27ba..548430b 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
@@ -342,6 +342,8 @@ const char *get_docker_error_message(const int error_code) {
       return "Docker image is not trusted";
     case INVALID_DOCKER_TMPFS_MOUNT:
       return "Invalid docker tmpfs mount";
+    case INVALID_DOCKER_RUNTIME:
+      return "Invalid docker runtime";
     default:
       return "Unknown error";
   }
@@ -883,6 +885,19 @@ static int set_network(const struct configuration 
*command_config,
   return ret;
 }
 
+static int set_runtime(const struct configuration *command_config,
+                       const struct configuration *conf, args *args) {
+  int ret = 0;
+  ret = add_param_to_command_if_allowed(command_config, conf, "runtime",
+                                        "docker.allowed.runtimes", 
"--runtime=",
+                                        0, 0, args);
+  if (ret != 0) {
+    fprintf(ERRORFILE, "Could not find requested runtime in allowed 
runtimes\n");
+    ret = INVALID_DOCKER_RUNTIME;
+  }
+  return ret;
+}
+
 static int set_pid_namespace(const struct configuration *command_config,
                    const struct configuration *conf, args *args) {
   char *value = get_configuration_value("pid", DOCKER_COMMAND_FILE_SECTION,
@@ -1527,6 +1542,11 @@ int get_docker_run_command(const char *command_file, 
const struct configuration
     goto free_and_exit;
   }
 
+  ret = set_runtime(&command_config, conf, args);
+  if (ret != 0) {
+    goto free_and_exit;
+  }
+
   ret = set_hostname(&command_config, args);
   if (ret != 0) {
     goto free_and_exit;
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
index 7b7322d..0b281cc 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
@@ -67,7 +67,8 @@ enum docker_error_codes {
     PID_HOST_DISABLED,
     INVALID_PID_NAMESPACE,
     INVALID_DOCKER_IMAGE_TRUST,
-    INVALID_DOCKER_TMPFS_MOUNT
+    INVALID_DOCKER_TMPFS_MOUNT,
+    INVALID_DOCKER_RUNTIME
 };
 
 /**
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
index b289857..dba1947 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
@@ -432,6 +432,68 @@ namespace ContainerExecutor {
     run_docker_run_helper_function(file_cmd_vec, set_hostname);
   }
 
+  TEST_F(TestDockerUtil, test_set_runtime) {
+    struct configuration container_cfg;
+    struct args buff = ARGS_INITIAL_VALUE;
+    int ret = 0;
+    std::string container_executor_cfg_contents = "[docker]\n"
+        "  docker.trusted.registries=hadoop\n"
+        "  docker.allowed.runtimes=lxc,nvidia";
+    std::vector<std::pair<std::string, std::string> > file_cmd_vec;
+    file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+        "[docker-command-execution]\n  docker-command=run\n  
image=hadoop/image\n runtime=lxc", "--runtime=lxc"));
+    file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+        "[docker-command-execution]\n  docker-command=run\n  
image=hadoop/image\n runtime=nvidia", "--runtime=nvidia"));
+    file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+        "[docker-command-execution]\n  docker-command=run", ""));
+    write_container_executor_cfg(container_executor_cfg_contents);
+    ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
+
+    std::vector<std::pair<std::string, std::string> >::const_iterator itr;
+    if (ret != 0) {
+      FAIL();
+    }
+    for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
+      struct configuration cmd_cfg;
+      write_command_file(itr->first);
+      ret = read_config(docker_command_file.c_str(), &cmd_cfg);
+      if (ret != 0) {
+        FAIL();
+      }
+      ret = set_runtime(&cmd_cfg, &container_cfg, &buff);
+      char *actual = flatten(&buff);
+      ASSERT_EQ(0, ret) << "error message: " << get_docker_error_message(ret) 
<< " for input " << itr->first;
+      ASSERT_STREQ(itr->second.c_str(), actual);
+      reset_args(&buff);
+      free(actual);
+      free_configuration(&cmd_cfg);
+    }
+    struct configuration cmd_cfg_1;
+    write_command_file("[docker-command-execution]\n  docker-command=run\n  
runtime=nvidia1");
+    ret = read_config(docker_command_file.c_str(), &cmd_cfg_1);
+    if (ret != 0) {
+      FAIL();
+    }
+    ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
+    ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
+    ASSERT_EQ(0, buff.length);
+    reset_args(&buff);
+    free_configuration(&container_cfg);
+
+    container_executor_cfg_contents = "[docker]\n";
+    write_container_executor_cfg(container_executor_cfg_contents);
+    ret = read_config(container_executor_cfg_file.c_str(), &container_cfg);
+    if (ret != 0) {
+      FAIL();
+    }
+    ret = set_runtime(&cmd_cfg_1, &container_cfg, &buff);
+    ASSERT_EQ(INVALID_DOCKER_RUNTIME, ret);
+    ASSERT_EQ(0, buff.length);
+    reset_args(&buff);
+    free_configuration(&cmd_cfg_1);
+    free_configuration(&container_cfg);
+  }
+
   TEST_F(TestDockerUtil, test_set_group_add) {
     std::vector<std::pair<std::string, std::string> > file_cmd_vec;
     file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
index 8dc37d4..23483d3 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerRunCommand.java
@@ -59,6 +59,7 @@ public class TestDockerRunCommand {
     dockerRunCommand.setOverrideCommandWithArgs(commands);
     dockerRunCommand.removeContainerOnExit();
     dockerRunCommand.addTmpfsMount("/run");
+    dockerRunCommand.addRuntime("nvidia");
 
     assertEquals("run", StringUtils.join(",",
         dockerRunCommand.getDockerCommandWithArguments()
@@ -79,7 +80,9 @@ public class TestDockerRunCommand {
             .get("launch-command")));
     assertEquals("/run", StringUtils.join(",",
         dockerRunCommand.getDockerCommandWithArguments().get("tmpfs")));
-    assertEquals(8, dockerRunCommand.getDockerCommandWithArguments().size());
+    assertEquals("nvidia", StringUtils.join(",",
+        dockerRunCommand.getDockerCommandWithArguments().get("runtime")));
+    assertEquals(9, dockerRunCommand.getDockerCommandWithArguments().size());
   }
 
   @Test
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
new file mode 100644
index 0000000..b0b5233
--- /dev/null
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV2CommandPlugin.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package 
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * test for NvidiaDockerV2CommandPlugin.
+ */
+public class TestNvidiaDockerV2CommandPlugin {
+  private Map<String, List<String>> copyCommandLine(
+      Map<String, List<String>> map) {
+    Map<String, List<String>> ret = new HashMap<>();
+    for (Map.Entry<String, List<String>> entry : map.entrySet()) {
+      ret.put(entry.getKey(), new ArrayList<>(entry.getValue()));
+    }
+    return ret;
+  }
+
+  private boolean commandlinesEquals(Map<String, List<String>> cli1,
+      Map<String, List<String>> cli2) {
+    if (!Sets.symmetricDifference(cli1.keySet(), cli2.keySet()).isEmpty()) {
+      return false;
+    }
+
+    for (String key : cli1.keySet()) {
+      List<String> value1 = cli1.get(key);
+      List<String> value2 = cli2.get(key);
+      if (!value1.equals(value2)) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  static class MyNvidiaDockerV2CommandPlugin
+      extends NvidiaDockerV2CommandPlugin {
+    private boolean requestsGpu = false;
+
+    MyNvidiaDockerV2CommandPlugin() {}
+
+    public void setRequestsGpu(boolean r) {
+      requestsGpu = r;
+    }
+
+    @Override
+    protected boolean requestsGpu(Container container) {
+      return requestsGpu;
+    }
+  }
+
+  @Test
+  public void testPlugin() throws Exception {
+    DockerRunCommand runCommand = new DockerRunCommand("container_1", "user",
+        "fakeimage");
+
+    Map<String, List<String>> originalCommandline = copyCommandLine(
+        runCommand.getDockerCommandWithArguments());
+
+    MyNvidiaDockerV2CommandPlugin
+        commandPlugin = new MyNvidiaDockerV2CommandPlugin();
+
+    Container nmContainer = mock(Container.class);
+
+    // getResourceMapping is null, so commandline won't be updated
+    commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+    Assert.assertTrue(commandlinesEquals(originalCommandline,
+        runCommand.getDockerCommandWithArguments()));
+
+    // no GPU resource assigned, so commandline won't be updated
+    ResourceMappings resourceMappings = new ResourceMappings();
+    when(nmContainer.getResourceMappings()).thenReturn(resourceMappings);
+    commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+    Assert.assertTrue(commandlinesEquals(originalCommandline,
+        runCommand.getDockerCommandWithArguments()));
+
+    // Assign GPU resource
+    ResourceMappings.AssignedResources assigned =
+        new ResourceMappings.AssignedResources();
+    assigned.updateAssignedResources(
+        ImmutableList.of(new GpuDevice(0, 0), new GpuDevice(1, 1)));
+    resourceMappings.addAssignedResources(ResourceInformation.GPU_URI,
+        assigned);
+
+    commandPlugin.setRequestsGpu(true);
+    commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+    Map<String, List<String>> newCommandLine =
+        runCommand.getDockerCommandWithArguments();
+
+    // Command line will be updated
+    Assert.assertFalse(commandlinesEquals(originalCommandline, 
newCommandLine));
+    // NVIDIA_VISIBLE_DEVICES will be set
+    Assert.assertTrue(
+        runCommand.getEnv().get("NVIDIA_VISIBLE_DEVICES").equals("0,1"));
+    // runtime should exist
+    Assert.assertTrue(newCommandLine.containsKey("runtime"));
+  }
+}
\ No newline at end of file
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
index 17a335e..acdf04d 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
@@ -274,6 +274,7 @@ are allowed. It contains the following properties:
 | `docker.trusted.registries` | Comma separated list of trusted docker 
registries for running trusted privileged docker containers.  By default, no 
registries are defined. |
 | `docker.inspect.max.retries` | Integer value to check docker container 
readiness.  Each inspection is set with 3 seconds delay.  Default value of 10 
will wait 30 seconds for docker container to become ready before marked as 
container failed. |
 | `docker.no-new-privileges.enabled` | Enable/disable the no-new-privileges 
flag for docker run. Set to "true" to enable, disabled by default. |
+| `docker.allowed.runtimes` | Comma seperated runtimes that containers are 
allowed to use. By default no runtimes are allowed to be added.|
 
 Please note that if you wish to run Docker containers that require access to 
the YARN local directories, you must add them to the docker.allowed.rw-mounts 
list.
 
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
index f6000e7..85412af 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md
@@ -107,7 +107,7 @@ Following configs can be customized when user needs to run 
GPU applications insi
 | --- | --- |
 | yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 |
 
-Specify docker command plugin for GPU. By default uses Nvidia docker V1.0.
+Specify docker command plugin for GPU. By default uses Nvidia docker V1.0, 
`nvidia-docker-v2` is available for V2.x.
 
 | Property | Default value |
 | --- | --- |
@@ -169,6 +169,13 @@ docker.allowed.volume-drivers
 ...
 docker.allowed.ro-mounts=nvidia_driver_375.66
 ```
+**4) If use `nvidia-docker-v2` as gpu docker plugin, add `nvidia` to runtimes 
whitelist.**
+
+```
+[docker]
+...
+docker.allowed.runtimes=nvidia
+```
 
 # Use it
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to