This is an automated email from the ASF dual-hosted git repository.

gilbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 221efd311f241575ebf3170de663cd301d61e252
Author: Qian Zhang <[email protected]>
AuthorDate: Wed Feb 27 22:22:04 2019 -0800

    Added volume gid manager.
    
    This manager is used to allocate/deallocate gids for shared persistent
    volumes and PARENT type SANDBOX_PATH volumes.
    
    Review: https://reviews.apache.org/r/69675/
---
 src/CMakeLists.txt                                 |   4 +-
 src/Makefile.am                                    |  10 +-
 src/slave/volume_gid_manager/state.hpp             |  23 ++
 src/slave/volume_gid_manager/state.proto           |  41 +++
 .../volume_gid_manager/volume_gid_manager.cpp      | 371 +++++++++++++++++++++
 .../volume_gid_manager/volume_gid_manager.hpp      |  64 ++++
 6 files changed, 510 insertions(+), 3 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 687dc85..3397c3b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -85,6 +85,7 @@ PROTOC_GENERATE(INTERNAL TARGET 
slave/containerizer/mesos/isolators/network/cni/
 PROTOC_GENERATE(INTERNAL TARGET 
slave/containerizer/mesos/isolators/docker/volume/state)
 PROTOC_GENERATE(INTERNAL TARGET 
slave/containerizer/mesos/provisioner/docker/message)
 PROTOC_GENERATE(INTERNAL TARGET slave/state)
+PROTOC_GENERATE(INTERNAL TARGET slave/volume_gid_manager/state)
 PROTOC_GENERATE(INTERNAL TARGET master/registry)
 PROTOC_GENERATE(INTERNAL TARGET resource_provider/registry)
 PROTOC_GENERATE(INTERNAL TARGET resource_provider/state)
@@ -198,7 +199,8 @@ if (NOT WIN32)
     slave/containerizer/mesos/isolators/posix/disk.cpp
     slave/containerizer/mesos/isolators/posix/rlimits.cpp
     slave/containerizer/mesos/isolators/volume/sandbox_path.cpp
-    slave/containerizer/mesos/provisioner/utils.cpp)
+    slave/containerizer/mesos/provisioner/utils.cpp
+    slave/volume_gid_manager/volume_gid_manager.cpp)
 endif ()
 
 set(APPC_SRC
diff --git a/src/Makefile.am b/src/Makefile.am
index 283d5ed..6bf2b97 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -414,7 +414,9 @@ CXX_PROTOS +=                                               
                \
   slave/containerizer/mesos/isolators/docker/volume/state.pb.cc                
\
   slave/containerizer/mesos/isolators/docker/volume/state.pb.h         \
   slave/containerizer/mesos/isolators/network/cni/spec.pb.cc           \
-  slave/containerizer/mesos/isolators/network/cni/spec.pb.h
+  slave/containerizer/mesos/isolators/network/cni/spec.pb.h            \
+  slave/volume_gid_manager/state.pb.cc                                 \
+  slave/volume_gid_manager/state.pb.h
 
 CXX_PROTOS +=                                                          \
   resource_provider/storage/disk_profile.pb.cc                         \
@@ -1002,7 +1004,8 @@ libmesos_no_3rdparty_la_SOURCES =                         
        \
   slave/state.proto                                                    \
   slave/containerizer/mesos/provisioner/docker/message.proto           \
   slave/containerizer/mesos/isolators/docker/volume/state.proto                
\
-  slave/containerizer/mesos/isolators/network/cni/spec.proto
+  slave/containerizer/mesos/isolators/network/cni/spec.proto           \
+  slave/volume_gid_manager/state.proto
 
 # TODO(tillt): Remove authentication/cram_md5/* which will enable us to
 # lose the immediate cyrus-sasl2 dependency.
@@ -1287,6 +1290,9 @@ libmesos_no_3rdparty_la_SOURCES +=                        
                \
   slave/task_status_update_manager.hpp                                 \
   slave/validation.cpp                                                 \
   slave/validation.hpp                                                 \
+  slave/volume_gid_manager/state.hpp                                   \
+  slave/volume_gid_manager/volume_gid_manager.cpp                      \
+  slave/volume_gid_manager/volume_gid_manager.hpp                      \
   slave/windows_ctrlhandler.hpp                                                
\
   status_update_manager/operation.cpp                                  \
   status_update_manager/operation.hpp                                  \
diff --git a/src/slave/volume_gid_manager/state.hpp 
b/src/slave/volume_gid_manager/state.hpp
new file mode 100644
index 0000000..8b8d78c
--- /dev/null
+++ b/src/slave/volume_gid_manager/state.hpp
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __VOLUME_GID_MANAGER_STATE_HPP__
+#define __VOLUME_GID_MANAGER_STATE_HPP__
+
+// ONLY USEFUL AFTER RUNNING PROTOC.
+#include "slave/volume_gid_manager/state.pb.h"
+
+#endif // __VOLUME_GID_MANAGER_STATE_HPP__
diff --git a/src/slave/volume_gid_manager/state.proto 
b/src/slave/volume_gid_manager/state.proto
new file mode 100644
index 0000000..e6a31d3
--- /dev/null
+++ b/src/slave/volume_gid_manager/state.proto
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mesos.internal.slave;
+
+message VolumeGidInfo {
+  enum Type {
+    UNKNOWN = 0;
+    PERSISTENT = 1;
+    SANDBOX_PATH = 2;
+  }
+
+  // The type of the volume.
+  optional Type type = 1;
+
+  // The source path of the volume.
+  required string path = 2;
+
+  // The gid allocated to the volume
+  required uint32 gid = 3;
+}
+
+
+message VolumeGidInfos {
+  repeated VolumeGidInfo infos = 1;
+}
diff --git a/src/slave/volume_gid_manager/volume_gid_manager.cpp 
b/src/slave/volume_gid_manager/volume_gid_manager.cpp
new file mode 100644
index 0000000..ed8f6a2
--- /dev/null
+++ b/src/slave/volume_gid_manager/volume_gid_manager.cpp
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fts.h>
+#include <sys/types.h>
+
+#include <string>
+
+#include <mesos/resources.hpp>
+
+#include <process/dispatch.hpp>
+#include <process/id.hpp>
+
+#include <stout/os/su.hpp>
+
+#include "common/values.hpp"
+
+#include "slave/volume_gid_manager/volume_gid_manager.hpp"
+
+using std::string;
+using std::vector;
+
+using process::dispatch;
+using process::Failure;
+using process::Future;
+using process::Owned;
+
+using mesos::internal::values::rangesToIntervalSet;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Recursively change the owner group of the given path
+// to the given gid and set/unset the `setgid` bit.
+static Try<Nothing> setVolumeOwnership(
+    const string& path,
+    gid_t gid,
+    bool setgid)
+{
+  LOG(INFO) << "Start setting the owner group of the volume path '"
+            << path << "' " << (setgid ? "" : "back ") << "to " << gid;
+
+  char* path_[] = {const_cast<char*>(path.c_str()), nullptr};
+
+  FTS* tree = ::fts_open(path_, FTS_NOCHDIR | FTS_PHYSICAL, nullptr);
+  if (tree == nullptr) {
+    return ErrnoError("Failed to open '" + path + "'");
+  }
+
+  FTSENT *node;
+  while ((node = ::fts_read(tree)) != nullptr) {
+    const Path path = Path(node->fts_path);
+
+    switch (node->fts_info) {
+      // Preorder directory.
+      case FTS_D:
+      // Regular file.
+      case FTS_F:
+      // Symbolic link.
+      case FTS_SL: {
+        CHECK_NOTNULL(node->fts_statp);
+
+        // Change the owner group to the given gid.
+        if (::lchown(node->fts_path, node->fts_statp->st_uid, gid) < 0) {
+          Error error = ErrnoError();
+          ::fts_close(tree);
+          return Error(
+              "Chown failed on '" + path.string() + "': " + error.message);
+        }
+
+        if (node->fts_info == FTS_D) {
+          // Set the `setgid` bit for directories and add the write
+          // permission for the owner group.
+          if (setgid) {
+            if (::chmod(
+                node->fts_path,
+                node->fts_statp->st_mode | S_ISGID | S_IWGRP)) {
+              Error error = ErrnoError();
+              ::fts_close(tree);
+              return Error(
+                  "Chmod failed on '" + path.string() + "': " + error.message);
+            }
+          } else {
+            // Unset the `setgid` bit for directories and remove the write
+            // permission for the owner group.
+            if (::chmod(
+                node->fts_path,
+                node->fts_statp->st_mode & ~S_ISGID & ~S_IWGRP)) {
+              Error error = ErrnoError();
+              ::fts_close(tree);
+              return Error(
+                  "Chmod failed on '" + path.string() + "': " + error.message);
+            }
+          }
+        }
+
+        break;
+      }
+
+      // Unreadable directory.
+      case FTS_DNR:
+      // Error; errno is set.
+      case FTS_ERR:
+      // `stat(2)` failed.
+      case FTS_NS: {
+        Error error = ErrnoError(node->fts_errno);
+        ::fts_close(tree);
+        return Error(
+            "Failed to read '" + path.string() + "': " + error.message);
+      }
+
+      default:
+        break;
+    }
+  }
+
+  if (errno != 0) {
+    Error error = ErrnoError();
+    ::fts_close(tree);
+    return error;
+  }
+
+  if (::fts_close(tree) != 0) {
+    return ErrnoError("Failed to stop traversing file system");
+  }
+
+  LOG(INFO) << "Finished setting the owner group of the volume path '"
+            << path << "' " << (setgid ? "" : "back ") << "to " << gid;
+
+  return Nothing();
+}
+
+
+class VolumeGidManagerProcess : public 
process::Process<VolumeGidManagerProcess>
+{
+public:
+  VolumeGidManagerProcess(const IntervalSet<gid_t>& gids)
+    : ProcessBase(process::ID::generate("volume-gid-manager")),
+      totalGids(gids),
+      freeGids(gids) {}
+
+  // This method will be called when a container running as non-root user tries
+  // to use a shared persistent volume or a PARENT type SANDBOX_PATH volume, 
the
+  // parameter `path` will be the source path of the volume.
+  Future<gid_t> allocate(const string& path, VolumeGidInfo::Type type)
+  {
+    gid_t gid;
+
+    // If a gid has already been allocated for the specified path,
+    // just return the gid.
+    if (infos.contains(path)) {
+      gid = infos[path].gid();
+
+      LOG(INFO) << "Use the allocated gid " << gid << " of the volume path '"
+                << path << "'";
+    } else {
+      // Allocate a free gid to the specified path and then set the
+      // ownership for it.
+      if (freeGids.empty()) {
+        return Failure(
+            "Failed to allocate gid to the volume path '" + path +
+            "' because the free gid range is exhausted");
+      }
+
+      gid = freeGids.begin()->lower();
+
+      LOG(INFO) << "Allocating gid " << gid << " to the volume path '"
+                << path << "'";
+
+      Try<Nothing> result = setVolumeOwnership(path, gid, true);
+      if (result.isError()) {
+        return Failure(
+            "Failed to set the owner group of the volume path '" + path +
+            "' to " + stringify(gid) + ": " + result.error());
+      }
+
+      freeGids -= gid;
+
+      VolumeGidInfo info;
+      info.set_type(type);
+      info.set_path(path);
+      info.set_gid(gid);
+
+      infos.put(path, info);
+    }
+
+    return gid;
+  }
+
+  // This method will be called in two cases:
+  //   1. When a shared persistent volume is destroyed by agent, the parameter
+  //      `path` will be the shared persistent volume's path.
+  //   2. When a container is destroyed by containerizer, the parameter `path`
+  //      will be the container's sandbox path.
+  // We search if the given path is contained in `infos` (for the case 1) or is
+  // the parent directory of any volume paths in `infos` (for the case 2, i.e.,
+  // the PARENT type SANDBOX_PATH volume must be a subdirectory in the parent
+  // container's sandbox) and then free the allocated gid for the found 
path(s).
+  Future<Nothing> deallocate(const string& path)
+  {
+    vector<string> sandboxPathVolumes;
+
+    for (auto it = infos.begin(); it != infos.end(); ) {
+      const VolumeGidInfo& info = it->second;
+      const string& volumePath = info.path();
+
+      if (strings::startsWith(volumePath, path)) {
+        if (volumePath != path) {
+          // This is the case of the PARENT type SANDBOX_PATH volume.
+          sandboxPathVolumes.push_back(volumePath);
+        }
+
+        gid_t gid = info.gid();
+
+        LOG(INFO) << "Deallocated gid " << gid << " for the volume path '"
+                  << volumePath << "'";
+
+        // Only return the gid to the free range if it is in the total
+        // range. The gid may not be in the total range in the case that
+        // Mesos agent is restarted with a different total range and we
+        // deallocate gid for a previous volume path from the old range.
+        if (totalGids.contains(gid)) {
+          freeGids += gid;
+        }
+
+        it = infos.erase(it);
+      } else {
+        ++it;
+      }
+    }
+
+    // For the PARENT type SANDBOX_PATH volume, it will exist for a while
+    // (depending on GC policy) after the container is destroyed. So to
+    // avoid leaking it to other containers in the case that its gid is
+    // allocated to another volume, we need to change its owner group back
+    // to the original one (i.e., the primary group of its owner).
+    foreach (const string& volume, sandboxPathVolumes) {
+      // Get the uid of the volume's owner.
+      struct stat s;
+      if (::stat(volume.c_str(), &s) < 0) {
+        LOG(WARNING) << "Failed to stat '" << volume << "': "
+                     << os::strerror(errno);
+
+        continue;
+      }
+
+      Result<string> user = os::user(s.st_uid);
+      if (!user.isSome()) {
+        LOG(WARNING) << "Failed to get username for the uid " << s.st_uid
+                     << ": " << (user.isError() ? user.error() : "not found");
+
+        continue;
+      }
+
+      // Get the primary group ID of the user.
+      Result<gid_t> gid = os::getgid(user.get());
+      if (!gid.isSome()) {
+        LOG(WARNING) << "Failed to get gid for the user '" << user.get()
+                     << "': " << (gid.isError() ? gid.error() : "not found");
+
+        continue;
+      }
+
+      Try<Nothing> result = setVolumeOwnership(volume, gid.get(), false);
+      if (result.isError()) {
+        LOG(WARNING) << "Failed to set the owner group of the volume path '"
+                     << volume << "' back to " << gid.get() << ": "
+                     << result.error();
+      }
+    }
+
+    return Nothing();
+  }
+
+private:
+  const IntervalSet<gid_t> totalGids;
+  IntervalSet<gid_t> freeGids;
+
+  // Allocated gid infos keyed by the volume path.
+  hashmap<string, VolumeGidInfo> infos;
+};
+
+
+Try<VolumeGidManager*> VolumeGidManager::create(const Flags& flags)
+{
+  if (geteuid() != 0) {
+    return Error("Volume gid manager requires root privileges");
+  }
+
+  CHECK_SOME(flags.volume_gid_range);
+
+  Try<Resource> parse =
+    Resources::parse("gids", flags.volume_gid_range.get(), "*");
+
+  if (parse.isError()) {
+    return Error(
+        "Failed to parse volume gid range '" +
+        flags.volume_gid_range.get() + "'");
+  }
+
+  if (parse->type() != Value::RANGES) {
+    return Error(
+        "Invalid volume gid range type " +
+        mesos::Value_Type_Name(parse->type()) +
+        ", expecting " +
+        mesos::Value_Type_Name(Value::RANGES));
+  }
+
+  Try<IntervalSet<gid_t>> gids =
+    rangesToIntervalSet<gid_t>(parse->ranges());
+
+  if (gids.isError()) {
+    return Error("Invalid volume gid range '" +
+        stringify(parse->ranges()) + "': " + gids.error());
+  } else if (gids->empty()) {
+    return Error("Empty volume gid range");
+  }
+
+  return new VolumeGidManager(
+      Owned<VolumeGidManagerProcess>(new VolumeGidManagerProcess(gids.get())));
+}
+
+
+VolumeGidManager::VolumeGidManager(
+    const Owned<VolumeGidManagerProcess>& _process)
+  : process(_process)
+{
+  spawn(process.get());
+}
+
+
+VolumeGidManager::~VolumeGidManager()
+{
+  terminate(process.get());
+  process::wait(process.get());
+}
+
+
+Future<gid_t> VolumeGidManager::allocate(
+    const string& path,
+    VolumeGidInfo::Type type) const
+{
+  return dispatch(process.get(),
+                  &VolumeGidManagerProcess::allocate,
+                  path,
+                  type);
+}
+
+
+Future<Nothing> VolumeGidManager::deallocate(const string& path) const
+{
+  return dispatch(process.get(), &VolumeGidManagerProcess::deallocate, path);
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
diff --git a/src/slave/volume_gid_manager/volume_gid_manager.hpp 
b/src/slave/volume_gid_manager/volume_gid_manager.hpp
new file mode 100644
index 0000000..51732af
--- /dev/null
+++ b/src/slave/volume_gid_manager/volume_gid_manager.hpp
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __VOLUME_GID_MANAGER_HPP__
+#define __VOLUME_GID_MANAGER_HPP__
+
+#include <process/future.hpp>
+#include <process/process.hpp>
+
+#include <stout/nothing.hpp>
+#include <stout/try.hpp>
+
+#include "slave/flags.hpp"
+
+#include "slave/volume_gid_manager/state.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Forward declaration.
+class VolumeGidManagerProcess;
+
+
+// Manages the allocation of owner group IDs for shared
+// persistent volumes and SANDBOX_PATH volume of PARENT type.
+class VolumeGidManager
+{
+public:
+  static Try<VolumeGidManager*> create(const Flags& flags);
+
+  ~VolumeGidManager();
+
+  process::Future<gid_t> allocate(
+      const std::string& path,
+      VolumeGidInfo::Type type) const;
+
+  process::Future<Nothing> deallocate(const std::string& path) const;
+
+private:
+  explicit VolumeGidManager(
+      const process::Owned<VolumeGidManagerProcess>& process);
+
+  process::Owned<VolumeGidManagerProcess> process;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __VOLUME_GID_MANAGER_HPP__

Reply via email to