Updated the 'gpu/nvidia' isolator to be nested container aware.

In the process I also updated the Failure() / Log() messages in
'recover()' to contain more information (i.e. the same information as
the new generic cgroups isolator).

Review: https://reviews.apache.org/r/51966/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8b27afc9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8b27afc9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8b27afc9

Branch: refs/heads/master
Commit: 8b27afc9b0093be5cc33ab8b45608fa3916af80c
Parents: 442987e
Author: Kevin Klues <klue...@gmail.com>
Authored: Fri Sep 16 14:58:32 2016 -0700
Committer: Jie Yu <yujie....@gmail.com>
Committed: Fri Sep 16 14:58:32 2016 -0700

----------------------------------------------------------------------
 .../mesos/isolators/gpu/isolator.cpp            | 48 ++++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8b27afc9/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp 
b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
index 1383ba2..528c8ea 100644
--- a/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/gpu/isolator.cpp
@@ -43,6 +43,8 @@
 
 #include "slave/containerizer/mesos/isolator.hpp"
 
+#include "slave/containerizer/mesos/isolators/cgroups/constants.hpp"
+
 #include "slave/containerizer/mesos/isolators/gpu/allocator.hpp"
 #include "slave/containerizer/mesos/isolators/gpu/isolator.hpp"
 #include "slave/containerizer/mesos/isolators/gpu/nvml.hpp"
@@ -112,7 +114,7 @@ Try<Isolator*> NvidiaGpuIsolatorProcess::create(
   }
 
   // Retrieve the cgroups devices hierarchy.
-  Result<string> hierarchy = cgroups::hierarchy("devices");
+  Result<string> hierarchy = cgroups::hierarchy(CGROUP_SUBSYSTEM_DEVICES_NAME);
 
   if (hierarchy.isError()) {
     return Error(
@@ -188,6 +190,13 @@ Future<Nothing> NvidiaGpuIsolatorProcess::recover(
 
   foreach (const ContainerState& state, states) {
     const ContainerID& containerId = state.container_id();
+
+    // If we are a nested container, we skip the recover because our
+    // root ancestor will recover the GPU state from the cgroup for us.
+    if (containerId.has_parent()) {
+      continue;
+    }
+
     const string cgroup = path::join(flags.cgroups_root, containerId.value());
 
     Try<bool> exists = cgroups::exists(hierarchy, cgroup);
@@ -195,17 +204,24 @@ Future<Nothing> NvidiaGpuIsolatorProcess::recover(
       foreachvalue (Info* info, infos) {
         delete info;
       }
+
       infos.clear();
-      return Failure("Failed to check cgroup for container '" +
-                     stringify(containerId) + "'");
+
+      return Failure(
+          "Failed to check the existence of the cgroup "
+          "'" + cgroup + "' in hierarchy '" + hierarchy + "' "
+          "for container " + stringify(containerId) +
+          ": " + exists.error());
     }
 
     if (!exists.get()) {
-      VLOG(1) << "Couldn't find cgroup for container " << containerId;
       // This may occur if the executor has exited and the isolator
       // has destroyed the cgroup but the slave dies before noticing
       // this. This will be detected when the containerizer tries to
       // monitor the executor's pid.
+      LOG(WARNING) << "Couldn't find the cgroup '" << cgroup << "' "
+                   << "in hierarchy '" << hierarchy << "' "
+                   << "for container " << containerId;
       continue;
     }
 
@@ -248,6 +264,16 @@ Future<Option<ContainerLaunchInfo>> 
NvidiaGpuIsolatorProcess::prepare(
     const ContainerID& containerId,
     const mesos::slave::ContainerConfig& containerConfig)
 {
+  // If we are a nested container, we don't need to maintain an `Info()`
+  // struct about the container since we don't allocate GPUs to it
+  // directly (we only allocate GPUs to top-level containers, and they
+  // automatically get shared by nested containers).  However, we do
+  // still need to mount the necessary Nvidia libraries into the
+  // container. We call `_prepare()` directly to do this for us.
+  if (containerId.has_parent()) {
+    return _prepare(containerConfig);
+  }
+
   if (infos.contains(containerId)) {
     return Failure("Container has already been prepared");
   }
@@ -335,6 +361,10 @@ Future<Nothing> NvidiaGpuIsolatorProcess::update(
     const ContainerID& containerId,
     const Resources& resources)
 {
+  if (containerId.has_parent()) {
+    return Failure("Not supported for nested containers");
+  }
+
   if (!infos.contains(containerId)) {
     return Failure("Unknown container");
   }
@@ -432,6 +462,10 @@ Future<Nothing> NvidiaGpuIsolatorProcess::_update(
 Future<ResourceStatistics> NvidiaGpuIsolatorProcess::usage(
     const ContainerID& containerId)
 {
+  if (containerId.has_parent()) {
+    return Failure("Not supported for nested containers");
+  }
+
   if (!infos.contains(containerId)) {
     return Failure("Unknown container");
   }
@@ -446,6 +480,12 @@ Future<ResourceStatistics> NvidiaGpuIsolatorProcess::usage(
 Future<Nothing> NvidiaGpuIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
+  // If we are a nested container, we don't have an `Info()` struct to
+  // cleanup, so we just return immediately.
+  if (containerId.has_parent()) {
+    return Nothing();
+  }
+
   // Multiple calls may occur during test clean up.
   if (!infos.contains(containerId)) {
     VLOG(1) << "Ignoring cleanup request for unknown container " << 
containerId;

Reply via email to