This is an automated email from the ASF dual-hosted git repository.

gilbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 3b7670ee72ae215faf2ad012e92fc344fc540baa
Author: Qian Zhang <[email protected]>
AuthorDate: Wed Feb 27 22:22:21 2019 -0800

    Made non-root containers can access shared persistent volume.
    
    Review: https://reviews.apache.org/r/69544/
---
 src/slave/containerizer/mesos/containerizer.cpp    |  80 +++++++----
 .../mesos/isolators/filesystem/linux.cpp           | 146 +++++++++++++------
 .../mesos/isolators/filesystem/linux.hpp           |  12 +-
 .../mesos/isolators/filesystem/posix.cpp           | 157 +++++++++++++++------
 .../mesos/isolators/filesystem/posix.hpp           |  13 +-
 .../mesos/isolators/filesystem/windows.cpp         |  11 +-
 .../mesos/isolators/filesystem/windows.hpp         |  11 +-
 .../volume_gid_manager/volume_gid_manager.cpp      |  79 ++++++++---
 8 files changed, 367 insertions(+), 142 deletions(-)

diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index a30c00a..0432448 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -371,13 +371,29 @@ Try<MesosContainerizer*> MesosContainerizer::create(
     // Filesystem isolators.
 
 #ifdef __WINDOWS__
-    {"filesystem/windows", &WindowsFilesystemIsolatorProcess::create},
+    {"filesystem/windows",
+      [volumeGidManager] (const Flags& flags) -> Try<Isolator*> {
+        return WindowsFilesystemIsolatorProcess::create(
+            flags,
+            volumeGidManager);
+      }},
 #else
-    {"filesystem/posix", &PosixFilesystemIsolatorProcess::create},
+    {"filesystem/posix",
+      [volumeGidManager] (const Flags& flags) -> Try<Isolator*> {
+        return PosixFilesystemIsolatorProcess::create(
+            flags,
+            volumeGidManager);
+      }},
 #endif // __WINDOWS__
 
 #ifdef __linux__
-    {"filesystem/linux", &LinuxFilesystemIsolatorProcess::create},
+    {"filesystem/linux",
+      [volumeGidManager] (const Flags& flags) -> Try<Isolator*> {
+        return LinuxFilesystemIsolatorProcess::create(
+            flags,
+            volumeGidManager);
+      }},
+
     // TODO(jieyu): Deprecate this in favor of using filesystem/linux.
     {"filesystem/shared", &SharedFilesystemIsolatorProcess::create},
 #endif // __linux__
@@ -1715,35 +1731,49 @@ Future<Containerizer::LaunchResult> 
MesosContainerizerProcess::_launch(
   }
 
   // For command tasks specifically, we should add the task_environment
-  // flag to the launch command of the command executor.
+  // flag and the task_supplementary_groups flag to the launch command
+  // of the command executor.
   // TODO(tillt): Remove this once we no longer support the old style
   // command task (i.e., that uses mesos-execute).
-  if (container->config->has_task_info() && launchInfo.has_task_environment()) 
{
-    hashmap<string, string> commandTaskEnvironment;
+  if (container->config->has_task_info()) {
+    if (launchInfo.has_task_environment()) {
+      hashmap<string, string> commandTaskEnvironment;
+
+      foreach (const Environment::Variable& variable,
+               launchInfo.task_environment().variables()) {
+        const string& name = variable.name();
+        const string& value = variable.value();
+        if (commandTaskEnvironment.contains(name) &&
+            commandTaskEnvironment[name] != value) {
+          LOG(WARNING) << "Overwriting environment variable '" << name << "' "
+                       << "for container " << containerId;
+        }
+        // TODO(tillt): Consider making this 'secret' aware.
+        commandTaskEnvironment[name] = value;
+      }
 
-    foreach (const Environment::Variable& variable,
-             launchInfo.task_environment().variables()) {
-      const string& name = variable.name();
-      const string& value = variable.value();
-      if (commandTaskEnvironment.contains(name) &&
-          commandTaskEnvironment[name] != value) {
-        LOG(WARNING) << "Overwriting environment variable '" << name << "' "
-                     << "for container " << containerId;
+      if (!commandTaskEnvironment.empty()) {
+        Environment taskEnvironment;
+        foreachpair (
+            const string& name, const string& value, commandTaskEnvironment) {
+          Environment::Variable* variable = taskEnvironment.add_variables();
+          variable->set_name(name);
+          variable->set_value(value);
+        }
+        launchInfo.mutable_command()->add_arguments(
+            "--task_environment=" + 
stringify(JSON::protobuf(taskEnvironment)));
       }
-      // TODO(tillt): Consider making this 'secret' aware.
-      commandTaskEnvironment[name] = value;
     }
 
-    if (!commandTaskEnvironment.empty()) {
-      Environment taskEnvironment;
-      foreachpair (
-          const string& name, const string& value, commandTaskEnvironment) {
-        Environment::Variable* variable = taskEnvironment.add_variables();
-        variable->set_name(name);
-        variable->set_value(value);
-      }
+    if (launchInfo.task_supplementary_groups_size() > 0) {
+      // Remove duplicated entries in supplementary groups.
+      set<uint32_t> taskSupplementaryGroups(
+          launchInfo.task_supplementary_groups().begin(),
+          launchInfo.task_supplementary_groups().end());
+
       launchInfo.mutable_command()->add_arguments(
-          "--task_environment=" + stringify(JSON::protobuf(taskEnvironment)));
+          "--task_supplementary_groups=" +
+          strings::join(",", taskSupplementaryGroups));
     }
   }
 
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp 
b/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
index 1e116b7..341853a 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
@@ -37,9 +37,9 @@
 #include <stout/stringify.hpp>
 #include <stout/strings.hpp>
 
+#include <stout/os/realpath.hpp>
 #include <stout/os/shell.hpp>
 #include <stout/os/strerror.hpp>
-#include <stout/os/realpath.hpp>
 
 #include "common/protobuf_utils.hpp"
 
@@ -451,7 +451,9 @@ static Try<Nothing> ensureAllowDevices(const string& 
_targetDir)
 }
 
 
-Try<Isolator*> LinuxFilesystemIsolatorProcess::create(const Flags& flags)
+Try<Isolator*> LinuxFilesystemIsolatorProcess::create(
+    const Flags& flags,
+    VolumeGidManager* volumeGidManager)
 {
   if (geteuid() != 0) {
     return Error("'filesystem/linux' isolator requires root privileges");
@@ -506,16 +508,18 @@ Try<Isolator*> 
LinuxFilesystemIsolatorProcess::create(const Flags& flags)
   }
 
   Owned<MesosIsolatorProcess> process(
-      new LinuxFilesystemIsolatorProcess(flags));
+      new LinuxFilesystemIsolatorProcess(flags, volumeGidManager));
 
   return new MesosIsolator(process);
 }
 
 
 LinuxFilesystemIsolatorProcess::LinuxFilesystemIsolatorProcess(
-    const Flags& _flags)
+    const Flags& _flags,
+    VolumeGidManager* _volumeGidManager)
   : ProcessBase(process::ID::generate("linux-filesystem-isolator")),
     flags(_flags),
+    volumeGidManager(_volumeGidManager),
     metrics(PID<LinuxFilesystemIsolatorProcess>(this)) {}
 
 
@@ -748,9 +752,29 @@ Future<Option<ContainerLaunchInfo>> 
LinuxFilesystemIsolatorProcess::prepare(
   }
 
   return update(containerId, containerConfig.resources())
-    .then([launchInfo]() -> Future<Option<ContainerLaunchInfo>> {
-      return launchInfo;
-    });
+    .then(defer(
+        self(),
+        [this, containerId, containerConfig, launchInfo]() mutable
+            -> Future<Option<ContainerLaunchInfo>> {
+          if (!infos.contains(containerId)) {
+            return Failure("Unknown container");
+          }
+
+          foreach (gid_t gid, infos[containerId]->gids) {
+            // For command task with its own rootfs, the command executor will
+            // run as root and the task itself will run as the specified normal
+            // user, so here we add the supplementary group for the task and 
the
+            // command executor will set it accordingly when launching the 
task.
+            if (containerConfig.has_task_info() &&
+                containerConfig.has_rootfs()) {
+              launchInfo.add_task_supplementary_groups(gid);
+            } else {
+              launchInfo.add_supplementary_groups(gid);
+            }
+          }
+
+          return launchInfo;
+    }));
 }
 
 
@@ -826,6 +850,8 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
   const uid_t uid = s.st_uid;
   const gid_t gid = s.st_gid;
 
+  vector<Future<gid_t>> futures;
+
   // We then mount new persistent volumes.
   foreach (const Resource& resource, resources.persistentVolumes()) {
     // This is enforced by the master.
@@ -848,42 +874,68 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
     // Determine the source of the mount.
     string source = paths::getPersistentVolumePath(flags.work_dir, resource);
 
-    bool isVolumeInUse = false;
-
-    foreachpair (const ContainerID& _containerId,
-                 const Owned<Info>& info,
-                 infos) {
-      // Skip self.
-      if (_containerId == containerId) {
-        continue;
-      }
-
-      if (info->resources.contains(resource)) {
-        isVolumeInUse = true;
-        break;
-      }
-    }
+    // If the container's user is root (uid == 0), we do not need to do any
+    // changes about the volume's ownership since it has the full permissions
+    // to access the volume.
+    if (uid != 0) {
+      // For shared persistent volume, if volume gid manager is enabled, call
+      // volume gid manager to allocate a gid to make sure the container has
+      // the permission to access the volume.
+      //
+      // TODO(qianzhang): Support gid allocation for shared persistent volumes
+      // from resource providers.
+      if (resource.has_shared() &&
+          !Resources::hasResourceProvider(resource) &&
+          volumeGidManager) {
+        LOG(INFO) << "Invoking volume gid manager to allocate gid to the "
+                  << "volume path '" << source << "' for container "
+                  << containerId;
+
+        futures.push_back(
+            volumeGidManager->allocate(source, VolumeGidInfo::PERSISTENT));
+      } else {
+        bool isVolumeInUse = false;
+
+        // Check if the shared persistent volume is currently used by another
+        // container. We do not need to do this check for local persistent
+        // volume since it can only be used by one container at a time.
+        if (resource.has_shared()) {
+          foreachpair (const ContainerID& _containerId,
+                       const Owned<Info>& info,
+                       infos) {
+            // Skip self.
+            if (_containerId == containerId) {
+              continue;
+            }
+
+            if (info->resources.contains(resource)) {
+              isVolumeInUse = true;
+              break;
+            }
+          }
+        }
 
-    // Set the ownership of the persistent volume to match that of the sandbox
-    // directory if the volume is not already in use. If the volume is
-    // currently in use by other containers, tasks in this container may fail
-    // to read from or write to the persistent volume due to incompatible
-    // ownership and file system permissions.
-    if (!isVolumeInUse) {
-      LOG(INFO) << "Changing the ownership of the persistent volume at '"
-                << source << "' with uid " << uid << " and gid " << gid;
-
-      Try<Nothing> chown = os::chown(uid, gid, source, false);
-
-      if (chown.isError()) {
-        return Failure(
-            "Failed to change the ownership of the persistent volume at '" +
-            source + "' with uid " + stringify(uid) +
-            " and gid " + stringify(gid) + ": " + chown.error());
+        // Set the ownership of the persistent volume to match that of the
+        // sandbox directory if the volume is not already in use. If the
+        // volume is currently in use by other containers, tasks in this
+        // container may fail to read from or write to the persistent volume
+        // due to incompatible ownership and file system permissions.
+        if (!isVolumeInUse) {
+          LOG(INFO) << "Changing the ownership of the persistent volume at '"
+                    << source << "' with uid " << uid << " and gid " << gid;
+
+          Try<Nothing> chown = os::chown(uid, gid, source, false);
+          if (chown.isError()) {
+            return Failure(
+                "Failed to change the ownership of the persistent volume at '" 
+
+                source + "' with uid " + stringify(uid) +
+                " and gid " + stringify(gid) + ": " + chown.error());
+          }
+        } else {
+          LOG(INFO) << "Leaving the ownership of the persistent volume at '"
+                    << source << "' unchanged because it is in use";
+        }
       }
-    } else {
-      LOG(INFO) << "Leaving the ownership of the persistent volume at '"
-                << source << "' unchanged because it is in use";
     }
 
     // Determine the target of the mount.
@@ -956,7 +1008,17 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
   // Store the new resources;
   info->resources = resources;
 
-  return Nothing();
+  return collect(futures)
+    .then(defer(self(), [this, containerId](const vector<gid_t>& gids)
+      -> Future<Nothing> {
+      if (!infos.contains(containerId)) {
+        return Failure("Unknown container");
+      }
+
+      infos[containerId]->gids = gids;
+
+      return Nothing();
+    }));
 }
 
 
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/linux.hpp 
b/src/slave/containerizer/mesos/isolators/filesystem/linux.hpp
index 8f76944..e6fe688 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/linux.hpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/linux.hpp
@@ -31,6 +31,8 @@
 
 #include "slave/containerizer/mesos/isolator.hpp"
 
+#include "slave/volume_gid_manager/volume_gid_manager.hpp"
+
 namespace mesos {
 namespace internal {
 namespace slave {
@@ -42,7 +44,9 @@ namespace slave {
 class LinuxFilesystemIsolatorProcess : public MesosIsolatorProcess
 {
 public:
-  static Try<mesos::slave::Isolator*> create(const Flags& flags);
+  static Try<mesos::slave::Isolator*> create(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager = nullptr);
 
   ~LinuxFilesystemIsolatorProcess() override;
 
@@ -65,9 +69,12 @@ public:
       const ContainerID& containerId) override;
 
 private:
-  LinuxFilesystemIsolatorProcess(const Flags& flags);
+  LinuxFilesystemIsolatorProcess(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager);
 
   const Flags flags;
+  VolumeGidManager* volumeGidManager;
 
   struct Info
   {
@@ -84,6 +91,7 @@ private:
     Resources resources;
 
     Option<ExecutorInfo> executor;
+    std::vector<gid_t> gids;
   };
 
   hashmap<ContainerID, process::Owned<Info>> infos;
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp 
b/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
index f91a2ee..08449e2 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
@@ -17,6 +17,7 @@
 #include <string>
 #include <vector>
 
+#include <process/collect.hpp>
 #include <process/id.hpp>
 
 #include <stout/fs.hpp>
@@ -45,18 +46,22 @@ namespace internal {
 namespace slave {
 
 PosixFilesystemIsolatorProcess::PosixFilesystemIsolatorProcess(
-    const Flags& _flags)
+    const Flags& _flags,
+    VolumeGidManager* _volumeGidManager)
   : ProcessBase(process::ID::generate("posix-filesystem-isolator")),
-    flags(_flags) {}
+    flags(_flags),
+    volumeGidManager(_volumeGidManager) {}
 
 
 PosixFilesystemIsolatorProcess::~PosixFilesystemIsolatorProcess() {}
 
 
-Try<Isolator*> PosixFilesystemIsolatorProcess::create(const Flags& flags)
+Try<Isolator*> PosixFilesystemIsolatorProcess::create(
+    const Flags& flags,
+    VolumeGidManager* volumeGidManager)
 {
   process::Owned<MesosIsolatorProcess> process(
-      new PosixFilesystemIsolatorProcess(flags));
+      new PosixFilesystemIsolatorProcess(flags, volumeGidManager));
 
   return new MesosIsolator(process);
 }
@@ -101,7 +106,30 @@ Future<Option<ContainerLaunchInfo>> 
PosixFilesystemIsolatorProcess::prepare(
   infos.put(containerId, Owned<Info>(new Info(containerConfig.directory())));
 
   return update(containerId, executorInfo.resources())
-      .then([]() -> Future<Option<ContainerLaunchInfo>> { return None(); });
+    .then(defer(
+        self(),
+        [this, containerId, containerConfig]() mutable
+            -> Future<Option<ContainerLaunchInfo>> {
+          if (!infos.contains(containerId)) {
+            return Failure("Unknown container");
+          }
+
+          ContainerLaunchInfo launchInfo;
+          foreach (gid_t gid, infos[containerId]->gids) {
+            // For command task with its own rootfs, the command executor will
+            // run as root and the task itself will run as the specified normal
+            // user, so here we add the supplementary group for the task and 
the
+            // command executor will set it accordingly when launching the 
task.
+            if (containerConfig.has_task_info() &&
+                containerConfig.has_rootfs()) {
+              launchInfo.add_task_supplementary_groups(gid);
+            } else {
+              launchInfo.add_supplementary_groups(gid);
+            }
+          }
+
+          return launchInfo;
+      }));
 }
 
 
@@ -162,6 +190,8 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
   const uid_t uid = s.st_uid;
   const gid_t gid = s.st_gid;
 
+  vector<Future<gid_t>> futures;
+
   // We then link additional persistent volumes.
   foreach (const Resource& resource, resources.persistentVolumes()) {
     // This is enforced by the master.
@@ -183,45 +213,74 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
 
     string original = paths::getPersistentVolumePath(flags.work_dir, resource);
 
-    bool isVolumeInUse = false;
-
-    foreachpair (const ContainerID& _containerId,
-                 const Owned<Info>& info,
-                 infos) {
-      // Skip self.
-      if (_containerId == containerId) {
-        continue;
-      }
-
-      if (info->resources.contains(resource)) {
-        isVolumeInUse = true;
-        break;
-      }
-    }
-
-    // Set the ownership of the persistent volume to match that of the sandbox
-    // directory if the volume is not already in use. If the volume is
-    // currently in use by other containers, tasks in this container may fail
-    // to read from or write to the persistent volume due to incompatible
-    // ownership and file system permissions.
-    if (!isVolumeInUse) {
-      // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the 
invariant
-      // that they are used by one task at a time. This is currently enforced 
by
-      // `os::chown`. Windows does not support `os::chown`, we will need to
-      // revisit this later.
-#ifndef __WINDOWS__
-      LOG(INFO) << "Changing the ownership of the persistent volume at '"
-                << original << "' with uid " << uid << " and gid " << gid;
-
-      Try<Nothing> chown = os::chown(uid, gid, original, false);
-
-      if (chown.isError()) {
-        return Failure(
-            "Failed to change the ownership of the persistent volume at '" +
-            original + "' with uid " + stringify(uid) +
-            " and gid " + stringify(gid) + ": " + chown.error());
+    // If the container's user is root (uid == 0), we do not need to do any
+    // changes about the volume's ownership since it has the full permissions
+    // to access the volume.
+    if (uid != 0) {
+      // For shared persistent volume, if volume gid manager is enabled, call
+      // volume gid manager to allocate a gid to make sure the container has
+      // the permission to access the volume.
+      //
+      // TODO(qianzhang): Support gid allocation for shared persistent volumes
+      // from resource providers.
+      if (resource.has_shared() &&
+          !Resources::hasResourceProvider(resource) &&
+          volumeGidManager) {
+  #ifndef __WINDOWS__
+        LOG(INFO) << "Invoking volume gid manager to allocate gid to the "
+                  << "volume path '" << original << "' for container "
+                  << containerId;
+
+        futures.push_back(
+            volumeGidManager->allocate(original, VolumeGidInfo::PERSISTENT));
+  #endif // __WINDOWS__
+      } else {
+        bool isVolumeInUse = false;
+
+        // Check if the shared persistent volume is currently used by another
+        // container. We do not need to do this check for local persistent
+        // volume since it can only be used by one container at a time.
+        if (resource.has_shared()) {
+          foreachpair (const ContainerID& _containerId,
+                       const Owned<Info>& info,
+                       infos) {
+            // Skip self.
+            if (_containerId == containerId) {
+              continue;
+            }
+
+            if (info->resources.contains(resource)) {
+              isVolumeInUse = true;
+              break;
+            }
+          }
+        }
+
+        // Set the ownership of the persistent volume to match that of the
+        // sandbox directory if the volume is not already in use. If the
+        // volume is currently in use by other containers, tasks in this
+        // container may fail to read from or write to the persistent volume
+        // due to incompatible ownership and file system permissions.
+        if (!isVolumeInUse) {
+          // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the
+          // invariant that they are used by one task at a time. This is
+          // currently enforced by `os::chown`. Windows does not support
+          // `os::chown`, we will need to revisit this later.
+  #ifndef __WINDOWS__
+          LOG(INFO) << "Changing the ownership of the persistent volume at '"
+                    << original << "' with uid " << uid << " and gid " << gid;
+
+          Try<Nothing> chown = os::chown(uid, gid, original, false);
+
+          if (chown.isError()) {
+            return Failure(
+                "Failed to change the ownership of the persistent volume at '" 
+
+                original + "' with uid " + stringify(uid) +
+                " and gid " + stringify(gid) + ": " + chown.error());
+          }
+  #endif // __WINDOWS__
+        }
       }
-#endif
     }
 
     string link = path::join(info->directory, containerPath);
@@ -279,7 +338,17 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
   // Store the updated resources.
   info->resources = resources;
 
-  return Nothing();
+  return collect(futures)
+    .then(defer(self(), [this, containerId](const vector<gid_t>& gids)
+      -> Future<Nothing> {
+      if (!infos.contains(containerId)) {
+        return Failure("Unknown container");
+      }
+
+      infos[containerId]->gids = gids;
+
+      return Nothing();
+    }));
 }
 
 
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/posix.hpp 
b/src/slave/containerizer/mesos/isolators/filesystem/posix.hpp
index deacc90..9db6ddd 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/posix.hpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/posix.hpp
@@ -23,6 +23,8 @@
 
 #include "slave/containerizer/mesos/isolator.hpp"
 
+#include "slave/volume_gid_manager/volume_gid_manager.hpp"
+
 namespace mesos {
 namespace internal {
 namespace slave {
@@ -30,7 +32,9 @@ namespace slave {
 class PosixFilesystemIsolatorProcess : public MesosIsolatorProcess
 {
 public:
-  static Try<mesos::slave::Isolator*> create(const Flags& flags);
+  static Try<mesos::slave::Isolator*> create(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager);
 
   ~PosixFilesystemIsolatorProcess() override;
 
@@ -50,9 +54,12 @@ public:
       const ContainerID& containerId) override;
 
 protected:
-  PosixFilesystemIsolatorProcess(const Flags& flags);
+  PosixFilesystemIsolatorProcess(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager);
 
   const Flags flags;
+  VolumeGidManager* volumeGidManager;
 
   struct Info
   {
@@ -63,6 +70,8 @@ protected:
 
     // Track resources so we can unlink unneeded persistent volumes.
     Resources resources;
+
+    std::vector<gid_t> gids;
   };
 
   hashmap<ContainerID, process::Owned<Info>> infos;
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/windows.cpp 
b/src/slave/containerizer/mesos/isolators/filesystem/windows.cpp
index f169c38..eb8d528 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/windows.cpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/windows.cpp
@@ -29,14 +29,17 @@ namespace internal {
 namespace slave {
 
 WindowsFilesystemIsolatorProcess::WindowsFilesystemIsolatorProcess(
-    const Flags& _flags)
+    const Flags& _flags,
+    VolumeGidManager* _volumeGidManager)
   : ProcessBase(process::ID::generate("windows-filesystem-isolator")),
-    PosixFilesystemIsolatorProcess(_flags) {}
+    PosixFilesystemIsolatorProcess(_flags, _volumeGidManager) {}
 
-Try<Isolator*> WindowsFilesystemIsolatorProcess::create(const Flags& flags)
+Try<Isolator*> WindowsFilesystemIsolatorProcess::create(
+    const Flags& flags,
+    VolumeGidManager* volumeGidManager)
 {
   process::Owned<MesosIsolatorProcess> process(
-      new WindowsFilesystemIsolatorProcess(flags));
+      new WindowsFilesystemIsolatorProcess(flags, volumeGidManager));
 
   return new MesosIsolator(process);
 }
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/windows.hpp 
b/src/slave/containerizer/mesos/isolators/filesystem/windows.hpp
index 2bf011d..aea0165 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/windows.hpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/windows.hpp
@@ -22,8 +22,11 @@
 #include "slave/flags.hpp"
 
 #include "slave/containerizer/mesos/isolator.hpp"
+
 #include "slave/containerizer/mesos/isolators/filesystem/posix.hpp"
 
+#include "slave/volume_gid_manager/volume_gid_manager.hpp"
+
 namespace mesos {
 namespace internal {
 namespace slave {
@@ -36,10 +39,14 @@ namespace slave {
 class WindowsFilesystemIsolatorProcess : public PosixFilesystemIsolatorProcess
 {
 public:
-  static Try<mesos::slave::Isolator*> create(const Flags& flags);
+  static Try<mesos::slave::Isolator*> create(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager);
 
 private:
-  WindowsFilesystemIsolatorProcess(const Flags& flags);
+  WindowsFilesystemIsolatorProcess(
+      const Flags& flags,
+      VolumeGidManager* volumeGidManager);
 };
 
 } // namespace slave {
diff --git a/src/slave/volume_gid_manager/volume_gid_manager.cpp 
b/src/slave/volume_gid_manager/volume_gid_manager.cpp
index ed8f6a2..49f70a2 100644
--- a/src/slave/volume_gid_manager/volume_gid_manager.cpp
+++ b/src/slave/volume_gid_manager/volume_gid_manager.cpp
@@ -28,6 +28,10 @@
 
 #include "common/values.hpp"
 
+#ifdef __linux__
+#include "linux/fs.hpp"
+#endif // __linux__
+
 #include "slave/volume_gid_manager/volume_gid_manager.hpp"
 
 using std::string;
@@ -168,34 +172,67 @@ public:
       LOG(INFO) << "Use the allocated gid " << gid << " of the volume path '"
                 << path << "'";
     } else {
-      // Allocate a free gid to the specified path and then set the
-      // ownership for it.
-      if (freeGids.empty()) {
-        return Failure(
-            "Failed to allocate gid to the volume path '" + path +
-            "' because the free gid range is exhausted");
+      Option<string> realPath;
+#ifdef __linux__
+      // This is to handle the case that nested containers use shared 
persistent
+      // volume, in which case we did a workaround in the default executor to
+      // set up a volume mapping (i.e., map the shared persistent volume to a
+      // SANDBOX_PATH volume of PARENT type for nested containers) so that the
+      // nested container can access the shared persistent volume. So here we
+      // need to search the mount table to see if the `path` is the target of a
+      // mount, and check if we have already allocated a gid to the source of
+      // that mount, if yes, then we just return that gid instead of allocating
+      // a new one, otherwise the ownership of the shared persistent volume 
will
+      // be wrongly overwritten.
+      Try<fs::MountInfoTable> table = fs::MountInfoTable::read();
+      if (table.isError()) {
+        return Failure("Failed to get mount table: " + table.error());
       }
 
-      gid = freeGids.begin()->lower();
+      foreach (const fs::MountInfoTable::Entry& entry, table->entries) {
+        if (path == entry.target) {
+          realPath = entry.root;
+          break;
+        }
+      }
+#endif // __linux__
 
-      LOG(INFO) << "Allocating gid " << gid << " to the volume path '"
-                << path << "'";
+      if (realPath.isSome() && infos.contains(realPath.get())) {
+        gid = infos[realPath.get()].gid();
 
-      Try<Nothing> result = setVolumeOwnership(path, gid, true);
-      if (result.isError()) {
-        return Failure(
-            "Failed to set the owner group of the volume path '" + path +
-            "' to " + stringify(gid) + ": " + result.error());
-      }
+        LOG(INFO) << "Use the allocated gid " << gid << " of the volume path '"
+                  << realPath.get() << "' which is mounted at the volume path 
'"
+                  << path << "'";
+      } else {
+        // Allocate a free gid to the specified path and then set the
+        // ownership for it.
+        if (freeGids.empty()) {
+          return Failure(
+              "Failed to allocate gid to the volume path '" + path +
+              "' because the free gid range is exhausted");
+        }
+
+        gid = freeGids.begin()->lower();
+
+        LOG(INFO) << "Allocating gid " << gid << " to the volume path '"
+                  << path << "'";
 
-      freeGids -= gid;
+        Try<Nothing> result = setVolumeOwnership(path, gid, true);
+        if (result.isError()) {
+          return Failure(
+              "Failed to set the owner group of the volume path '" + path +
+              "' to " + stringify(gid) + ": " + result.error());
+        }
+
+        freeGids -= gid;
 
-      VolumeGidInfo info;
-      info.set_type(type);
-      info.set_path(path);
-      info.set_gid(gid);
+        VolumeGidInfo info;
+        info.set_type(type);
+        info.set_path(path);
+        info.set_gid(gid);
 
-      infos.put(path, info);
+        infos.put(path, info);
+      }
     }
 
     return gid;

Reply via email to