Repository: mesos
Updated Branches:
  refs/heads/master c22527cce -> efb73d343


Add filesystem/posix isolator for persistent volumes.

Review: https://reviews.apache.org/r/34135


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/efb73d34
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/efb73d34
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/efb73d34

Branch: refs/heads/master
Commit: efb73d3433a4b51b7f5e72dc4c6c10a5d61d39c0
Parents: c22527c
Author: Ian Downes <[email protected]>
Authored: Tue Jul 21 15:54:15 2015 -0700
Committer: Jie Yu <[email protected]>
Committed: Tue Jul 21 17:09:50 2015 -0700

----------------------------------------------------------------------
 src/Makefile.am                                 |   2 +
 .../isolators/filesystem/posix.cpp              | 260 +++++++++++++++++++
 .../isolators/filesystem/posix.hpp              |  88 +++++++
 src/slave/containerizer/mesos/containerizer.cpp | 171 ++----------
 src/slave/containerizer/mesos/containerizer.hpp |   6 -
 5 files changed, 371 insertions(+), 156 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/efb73d34/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index d99f940..489ddb4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -403,6 +403,7 @@ libmesos_no_3rdparty_la_SOURCES =                           
        \
        slave/containerizer/external_containerizer.cpp                  \
        slave/containerizer/fetcher.cpp                                 \
        slave/containerizer/isolator.cpp                                \
+       slave/containerizer/isolators/filesystem/posix.cpp              \
        slave/containerizer/isolators/posix/disk.cpp                    \
        slave/containerizer/launcher.cpp                                \
        slave/containerizer/mesos/containerizer.cpp                     \
@@ -640,6 +641,7 @@ libmesos_no_3rdparty_la_SOURCES +=                          
        \
        slave/containerizer/isolators/cgroups/mem.hpp                   \
        slave/containerizer/isolators/cgroups/perf_event.hpp            \
        slave/containerizer/isolators/namespaces/pid.hpp                \
+       slave/containerizer/isolators/filesystem/posix.hpp              \
        slave/containerizer/isolators/filesystem/shared.hpp             \
        slave/containerizer/provisioner.hpp                             \
        slave/resource_estimators/noop.hpp                              \

http://git-wip-us.apache.org/repos/asf/mesos/blob/efb73d34/src/slave/containerizer/isolators/filesystem/posix.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/posix.cpp 
b/src/slave/containerizer/isolators/filesystem/posix.cpp
new file mode 100644
index 0000000..1904279
--- /dev/null
+++ b/src/slave/containerizer/isolators/filesystem/posix.cpp
@@ -0,0 +1,260 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <list>
+#include <string>
+
+#include <stout/fs.hpp>
+#include <stout/os.hpp>
+#include <stout/path.hpp>
+
+#include <mesos/mesos.hpp>
+#include <mesos/resources.hpp>
+
+#include "slave/paths.hpp"
+
+#include "slave/containerizer/isolators/filesystem/posix.hpp"
+
+using namespace process;
+
+using std::list;
+using std::string;
+
+using mesos::slave::ExecutorRunState;
+using mesos::slave::Isolator;
+using mesos::slave::IsolatorProcess;
+using mesos::slave::Limitation;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+PosixFilesystemIsolatorProcess::PosixFilesystemIsolatorProcess(
+    const Flags& _flags)
+  : flags(_flags) {}
+
+
+PosixFilesystemIsolatorProcess::~PosixFilesystemIsolatorProcess() {}
+
+
+Try<Isolator*> PosixFilesystemIsolatorProcess::create(const Flags& flags)
+{
+  process::Owned<IsolatorProcess> process(
+      new PosixFilesystemIsolatorProcess(flags));
+
+  return new Isolator(process);
+}
+
+
+Future<Nothing> PosixFilesystemIsolatorProcess::recover(
+    const list<ExecutorRunState>& states,
+    const hashset<ContainerID>& orphans)
+{
+  foreach (const ExecutorRunState& state, states) {
+    infos.put(state.id, Owned<Info>(new Info(state.directory)));
+  }
+
+  return Nothing();
+}
+
+
+Future<Option<CommandInfo>> PosixFilesystemIsolatorProcess::prepare(
+    const ContainerID& containerId,
+    const ExecutorInfo& executorInfo,
+    const string& directory,
+    const Option<string>& rootfs,
+    const Option<string>& user)
+{
+  if (infos.contains(containerId)) {
+    return Failure("Container has already been prepared");
+  }
+
+  // Return failure if the container change the filesystem root
+  // because the symlinks will become invalid in the new root.
+  if (rootfs.isSome()) {
+    return Failure("Container root filesystems not supported");
+  }
+
+  infos.put(containerId, Owned<Info>(new Info(directory)));
+
+  return update(containerId, executorInfo.resources())
+      .then([]() -> Future<Option<CommandInfo>> { return None(); });
+}
+
+
+Future<Nothing> PosixFilesystemIsolatorProcess::isolate(
+    const ContainerID& containerId,
+    pid_t pid)
+{
+  // No-op.
+  return Nothing();
+}
+
+
+Future<Limitation> PosixFilesystemIsolatorProcess::watch(
+    const ContainerID& containerId)
+{
+  // No-op.
+  return Future<Limitation>();
+}
+
+
+Future<Nothing> PosixFilesystemIsolatorProcess::update(
+    const ContainerID& containerId,
+    const Resources& resources)
+{
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  const Owned<Info>& info = infos[containerId];
+
+  // TODO(jieyu): Currently, we only allow non-nested relative
+  // container paths for volumes. This is enforced by the master. For
+  // those volumes, we create symlinks in the executor directory.
+  Resources current = info->resources;
+
+  // We first remove unneeded persistent volumes.
+  foreach (const Resource& resource, current.persistentVolumes()) {
+    // This is enforced by the master.
+    CHECK(resource.disk().has_volume());
+
+    // Ignore absolute and nested paths.
+    const string& containerPath = resource.disk().volume().container_path();
+    if (strings::contains(containerPath, "/")) {
+      LOG(WARNING) << "Skipping updating symlink for persistent volume "
+                   << resource << " of container " << containerId
+                   << " because the container path '" << containerPath
+                   << "' contains slash";
+      continue;
+    }
+
+    if (resources.contains(resource)) {
+      continue;
+    }
+
+    string link = path::join(info->directory, containerPath);
+
+    LOG(INFO) << "Removing symlink '" << link << "' for persistent volume "
+              << resource << " of container " << containerId;
+
+    Try<Nothing> rm = os::rm(link);
+    if (rm.isError()) {
+      return Failure(
+          "Failed to remove the symlink for the unneeded "
+          "persistent volume at '" + link + "'");
+    }
+  }
+
+  // We then link additional persistent volumes.
+  foreach (const Resource& resource, resources.persistentVolumes()) {
+    // This is enforced by the master.
+    CHECK(resource.disk().has_volume());
+
+    // Ignore absolute and nested paths.
+    const string& containerPath = resource.disk().volume().container_path();
+    if (strings::contains(containerPath, "/")) {
+      LOG(WARNING) << "Skipping updating symlink for persistent volume "
+                   << resource << " of container " << containerId
+                   << " because the container path '" << containerPath
+                   << "' contains slash";
+      continue;
+    }
+
+    if (current.contains(resource)) {
+      continue;
+    }
+
+    string original = paths::getPersistentVolumePath(
+        flags.work_dir,
+        resource.role(),
+        resource.disk().persistence().id());
+
+    // Set the ownership of the persistent volume to match that of the
+    // sandbox directory.
+    //
+    // NOTE: Currently, persistent volumes in Mesos are exclusive,
+    // meaning that if a persistent volume is used by one task or
+    // executor, it cannot be concurrently used by other task or
+    // executor. But if we allow multiple executors to use same
+    // persistent volume at the same time in the future, the ownership
+    // of the persistent volume may conflict here.
+    //
+    // TODO(haosdent): Consider letting the frameworks specify the
+    // user/group of the persistent volumes.
+    struct stat s;
+    if (::stat(info->directory.c_str(), &s) < 0) {
+      return Failure(
+          "Failed to get ownership for '" + info->directory +
+          "': " + strerror(errno));
+    }
+
+    LOG(INFO) << "Changing the ownership of the persistent volume at '"
+              << original << "' with uid " << s.st_uid
+              << " and gid " << s.st_gid;
+
+    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, true);
+    if (chown.isError()) {
+      return Failure(
+          "Failed to change the ownership of the persistent volume at '" +
+          original + "' with uid " + stringify(s.st_uid) +
+          " and gid " + stringify(s.st_gid) + ": " + chown.error());
+    }
+
+    string link = path::join(info->directory, containerPath);
+
+    LOG(INFO) << "Adding symlink from '" << original << "' to '"
+              << link << "' for persistent volume " << resource
+              << " of container " << containerId;
+
+    Try<Nothing> symlink = ::fs::symlink(original, link);
+    if (symlink.isError()) {
+      return Failure(
+          "Failed to symlink persistent volume from '" +
+          original + "' to '" + link + "'");
+    }
+  }
+
+  // Store the updated resources.
+  info->resources = resources;
+
+  return Nothing();
+}
+
+
+Future<ResourceStatistics> PosixFilesystemIsolatorProcess::usage(
+    const ContainerID& containerId)
+{
+  // No-op, no usage gathered.
+  return ResourceStatistics();
+}
+
+
+Future<Nothing> PosixFilesystemIsolatorProcess::cleanup(
+    const ContainerID& containerId)
+{
+  // Symlinks for persistent resources will be removed when the work
+  // directory is GC'ed, therefore no need to do explicit cleanup.
+  infos.erase(containerId);
+
+  return Nothing();
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/efb73d34/src/slave/containerizer/isolators/filesystem/posix.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/filesystem/posix.hpp 
b/src/slave/containerizer/isolators/filesystem/posix.hpp
new file mode 100644
index 0000000..16ba26f
--- /dev/null
+++ b/src/slave/containerizer/isolators/filesystem/posix.hpp
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSIX_FILESYSTEM_ISOLATOR_HPP__
+#define __POSIX_FILESYSTEM_ISOLATOR_HPP__
+
+#include <mesos/slave/isolator.hpp>
+
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class PosixFilesystemIsolatorProcess : public mesos::slave::IsolatorProcess
+{
+public:
+  static Try<mesos::slave::Isolator*> create(const Flags& flags);
+
+  virtual ~PosixFilesystemIsolatorProcess();
+
+  virtual process::Future<Nothing> recover(
+      const std::list<mesos::slave::ExecutorRunState>& states,
+      const hashset<ContainerID>& orphans);
+
+  virtual process::Future<Option<CommandInfo>> prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo,
+      const std::string& directory,
+      const Option<std::string>& rootfs,
+      const Option<std::string>& user);
+
+  virtual process::Future<Nothing> isolate(
+      const ContainerID& containerId,
+      pid_t pid);
+
+  virtual process::Future<mesos::slave::Limitation> watch(
+      const ContainerID& containerId);
+
+  virtual process::Future<Nothing> update(
+      const ContainerID& containerId,
+      const Resources& resources);
+
+  virtual process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId);
+
+  virtual process::Future<Nothing> cleanup(
+      const ContainerID& containerId);
+
+private:
+  PosixFilesystemIsolatorProcess(const Flags& flags);
+
+  const Flags flags;
+
+  struct Info
+  {
+    explicit Info(const std::string& _directory)
+      : directory(_directory) {}
+
+    const std::string directory;
+
+    // Track resources so we can unlink unneeded persistent volumes.
+    Resources resources;
+  };
+
+  hashmap<ContainerID, process::Owned<Info>> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __POSIX_FILESYSTEM_ISOLATOR_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/efb73d34/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index d3a596d..609620c 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -44,14 +44,24 @@
 #endif // __linux__
 
 #include "slave/containerizer/isolators/posix.hpp"
+
 #include "slave/containerizer/isolators/posix/disk.hpp"
+
 #ifdef __linux__
 #include "slave/containerizer/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/isolators/cgroups/mem.hpp"
 #include "slave/containerizer/isolators/cgroups/perf_event.hpp"
+#endif
+
+#include "slave/containerizer/isolators/filesystem/posix.hpp"
+#ifdef __linux__
 #include "slave/containerizer/isolators/filesystem/shared.hpp"
+#endif
+
+#ifdef __linux__
 #include "slave/containerizer/isolators/namespaces/pid.hpp"
-#endif // __linux__
+#endif
+
 #ifdef WITH_NETWORK_ISOLATOR
 #include "slave/containerizer/isolators/network/port_mapping.hpp"
 #endif
@@ -104,6 +114,12 @@ Try<MesosContainerizer*> MesosContainerizer::create(
     isolation = flags.isolation;
   }
 
+  // A filesystem isolator is required for persistent volumes; use the
+  // filesystem/posix isolator if necessary.
+  if (!strings::contains(isolation, "filesystem/")) {
+    isolation += ",filesystem/posix";
+  }
+
   // Modify the flags to include any changes to isolation.
   Flags flags_ = flags;
   flags_.isolation = isolation;
@@ -112,6 +128,7 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 
   // Create a MesosContainerizerProcess using isolators and a launcher.
   static const hashmap<string, Try<Isolator*> (*)(const Flags&)> creators = {
+    {"filesystem/posix", &PosixFilesystemIsolatorProcess::create},
     {"posix/cpu", &PosixCpuIsolatorProcess::create},
     {"posix/mem", &PosixMemIsolatorProcess::create},
     {"posix/disk", &PosixDiskIsolatorProcess::create},
@@ -565,24 +582,10 @@ Future<bool> MesosContainerizerProcess::launch(
   container->executorInfo = executorInfo;
   container->directory = directory;
   container->state = PREPARING;
-  containers_[containerId] = Owned<Container>(container);
-
-  // Prepare volumes for the container.
-  // TODO(jieyu): Consider decoupling file system isolation from
-  // runtime isolation. The existing isolators are actually for
-  // runtime isolation. For file system isolation, the interface might
-  // be different and we always need a file system isolator. The
-  // following logic should be moved to the file system isolator.
-  Try<Nothing> update = updateVolumes(containerId, executorInfo.resources());
-  if (update.isError()) {
-    return Failure("Failed to prepare volumes: " + update.error());
-  }
-
-  // NOTE: We do not update 'container->resources' until volumes are
-  // prepared because 'updateVolumes' above depends on the current
-  // container resources.
   container->resources = executorInfo.resources();
 
+  containers_.put(containerId, Owned<Container>(container));
+
   return provision(containerId, executorInfo, slaveId, directory, checkpoint)
     .then(defer(self(),
                 &Self::prepare,
@@ -1011,13 +1014,6 @@ Future<Nothing> MesosContainerizerProcess::update(
     return Nothing();
   }
 
-  // Update volumes for the container.
-  // TODO(jieyu): See comments above 'updateVolumes' in 'launch'.
-  Try<Nothing> update = updateVolumes(containerId, resources);
-  if (update.isError()) {
-    return Failure("Failed to update volumes: " + update.error());
-  }
-
   // NOTE: We update container's resources before isolators are updated
   // so that subsequent containerizer->update can be handled properly.
   container->resources = resources;
@@ -1412,131 +1408,6 @@ MesosContainerizerProcess::Metrics::~Metrics()
 }
 
 
-Try<Nothing> MesosContainerizerProcess::updateVolumes(
-    const ContainerID& containerId,
-    const Resources& updated)
-{
-  CHECK(containers_.contains(containerId));
-  const Owned<Container>& container = containers_[containerId];
-
-  // TODO(jieyu): Currently, we only allow non-nested relative
-  // container paths for volumes. This is enforced by the master. For
-  // those volumes, we create symlinks in the executor directory. No
-  // need to proceed if the container change the file system root
-  // because the symlinks will become invalid if the file system root
-  // is changed. Consider moving this logic to the file system
-  // isolator and let the file system isolator decide how to mount
-  // those volumes (by either creating symlinks or doing bind mounts).
-  if (container->rootfs.isSome()) {
-    LOG(WARNING) << "Cannot update volumes for container " << containerId
-                 << " because it changes the file system root";
-    return Nothing();
-  }
-
-  Resources current = container->resources;
-
-  // We first remove unneeded persistent volumes.
-  foreach (const Resource& resource, current.persistentVolumes()) {
-    // This is enforced by the master.
-    CHECK(resource.disk().has_volume());
-
-    // Ignore absolute and nested paths.
-    const string& containerPath = resource.disk().volume().container_path();
-    if (strings::contains(containerPath, "/")) {
-      LOG(WARNING) << "Skipping updating symlink for persistent volume "
-                   << resource << " of container " << containerId
-                   << " because the container path '" << containerPath
-                   << "' contains slash";
-      continue;
-    }
-
-    if (updated.contains(resource)) {
-      continue;
-    }
-
-    string link = path::join(container->directory, containerPath);
-
-    LOG(INFO) << "Removing symlink '" << link << "' for persistent volume "
-              << resource << " of container " << containerId;
-
-    Try<Nothing> rm = os::rm(link);
-    if (rm.isError()) {
-      return Error(
-          "Failed to remove the symlink for the unneeded "
-          "persistent volume at '" + link + "'");
-    }
-  }
-
-  // We then link additional persistent volumes.
-  foreach (const Resource& resource, updated.persistentVolumes()) {
-    // This is enforced by the master.
-    CHECK(resource.disk().has_volume());
-
-    // Ignore absolute and nested paths.
-    const string& containerPath = resource.disk().volume().container_path();
-    if (strings::contains(containerPath, "/")) {
-      LOG(WARNING) << "Skipping updating symlink for persistent volume "
-                   << resource << " of container " << containerId
-                   << " because the container path '" << containerPath
-                   << "' contains slash";
-      continue;
-    }
-
-    if (current.contains(resource)) {
-      continue;
-    }
-
-    string link = path::join(container->directory, containerPath);
-
-    string original = paths::getPersistentVolumePath(
-        flags.work_dir,
-        resource.role(),
-        resource.disk().persistence().id());
-
-    LOG(INFO) << "Adding symlink from '" << original << "' to '"
-              << link << "' for persistent volume " << resource
-              << " of container " << containerId;
-
-    Try<Nothing> symlink = fs::symlink(original, link);
-    if (symlink.isError()) {
-      return Error(
-          "Failed to symlink persistent volume from '" +
-          original + "' to '" + link + "'");
-    }
-
-    // Set the ownership of persistent volume to match the sandbox
-    // directory. Currently, persistent volumes in mesos are
-    // exclusive. If one persistent volume is used by one
-    // task/executor, it cannot be concurrently used by other
-    // task/executor. But if we allow multiple executors use same
-    // persistent volume at the same time in the future, the ownership
-    // of persistent volume may conflict here.
-    // TODO(haosdent): We need to update this after we have a proposed
-    // plan to adding user/group to persistent volumes.
-    struct stat s;
-    if (::stat(container->directory.c_str(), &s) < 0) {
-      return Error("Failed to get permissions on '" + container->directory +
-                   "': " + strerror(errno));
-    }
-
-    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, true);
-    if (chown.isError()) {
-      return Error("Failed to chown persistent volume '" + original +
-                   "': " + chown.error());
-    }
-  }
-
-  return Nothing();
-}
-
-
-static list<Future<Nothing>> __cleanupIsolators(
-    const list<Future<Nothing>>& cleanups)
-{
-  return cleanups;
-}
-
-
 static Future<list<Future<Nothing>>> _cleanupIsolators(
     const Owned<Isolator>& isolator,
     const ContainerID& containerId,
@@ -1553,7 +1424,7 @@ static Future<list<Future<Nothing>>> _cleanupIsolators(
   cleanup_.push_back(cleanup);
 
   return await(cleanup_)
-    .then(lambda::bind(&__cleanupIsolators, cleanups));
+    .then([cleanups]() -> list<Future<Nothing>> { return cleanups; });
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/efb73d34/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp 
b/src/slave/containerizer/mesos/containerizer.hpp
index a8c71d6..f6c580d 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -268,12 +268,6 @@ private:
   // destroy.
   void reaped(const ContainerID& containerId);
 
-  // Updates volumes for the given container according to its current
-  // resources and the given updated resources.
-  Try<Nothing> updateVolumes(
-      const ContainerID& containerId,
-      const Resources& updated);
-
   // TODO(jieyu): Consider introducing an Isolators struct and moving
   // all isolator related operations to that struct.
   process::Future<std::list<process::Future<Nothing>>> cleanupIsolators(

Reply via email to