Removed the code of checkpointing container root filesystem path. Review: https://reviews.apache.org/r/37105
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3a862137 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3a862137 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3a862137 Branch: refs/heads/master Commit: 3a862137aa1bcc6acb4ad30fc9d4191904ec5538 Parents: 210dadf Author: Jie Yu <[email protected]> Authored: Tue Aug 4 16:46:05 2015 -0700 Committer: Jie Yu <[email protected]> Committed: Fri Aug 7 16:54:14 2015 -0700 ---------------------------------------------------------------------- include/mesos/slave/isolator.proto | 1 - src/common/protobuf_utils.cpp | 6 +- src/common/protobuf_utils.hpp | 3 +- src/slave/containerizer/mesos/containerizer.cpp | 183 ++++++++----------- src/slave/containerizer/mesos/containerizer.hpp | 42 ++--- src/slave/containerizer/provisioner.hpp | 5 +- src/slave/paths.cpp | 19 -- src/slave/state.cpp | 24 --- src/slave/state.hpp | 1 - 9 files changed, 98 insertions(+), 186 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/include/mesos/slave/isolator.proto ---------------------------------------------------------------------- diff --git a/include/mesos/slave/isolator.proto b/include/mesos/slave/isolator.proto index 3d9222b..39d20c7 100644 --- a/include/mesos/slave/isolator.proto +++ b/include/mesos/slave/isolator.proto @@ -54,7 +54,6 @@ message ContainerState required uint64 pid = 3; // Executor pid. required string directory = 4; // Executor work directory. - optional string rootfs = 5; // Optional container rootfs. } http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/common/protobuf_utils.cpp ---------------------------------------------------------------------- diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp index 3cb6845..4de176b 100644 --- a/src/common/protobuf_utils.cpp +++ b/src/common/protobuf_utils.cpp @@ -240,17 +240,13 @@ ContainerState createContainerState( const ExecutorInfo& executorInfo, const ContainerID& container_id, pid_t pid, - const std::string& directory, - const Option<std::string>& rootfs) + const std::string& directory) { ContainerState state; state.mutable_executor_info()->CopyFrom(executorInfo); state.mutable_container_id()->CopyFrom(container_id); state.set_pid(pid); state.set_directory(directory); - if (rootfs.isSome()) { - state.set_rootfs(rootfs.get()); - } return state; } http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/common/protobuf_utils.hpp ---------------------------------------------------------------------- diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp index a4708ed..312bc61 100644 --- a/src/common/protobuf_utils.hpp +++ b/src/common/protobuf_utils.hpp @@ -88,8 +88,7 @@ mesos::slave::ContainerState createContainerState( const ExecutorInfo& executorInfo, const ContainerID& id, pid_t pid, - const std::string& directory, - const Option<std::string>& rootfs); + const std::string& directory); } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index e9fc4d7..78db4c1 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -422,8 +422,7 @@ Future<Nothing> MesosContainerizerProcess::recover( executorInfo, run.get().id.get(), run.get().forkedPid.get(), - directory, - run.get().rootfs); + directory); recoverable.push_back(executorRunState); } @@ -473,10 +472,6 @@ Future<Nothing> MesosContainerizerProcess::__recover( container->directory = run.directory(); - if (run.has_rootfs()) { - container->rootfs = run.rootfs(); - } - // We only checkpoint the containerizer pid after the container // successfully launched, therefore we can assume checkpointed // containers should be running after recover. @@ -551,6 +546,33 @@ void MesosContainerizerProcess::___recover( } +Future<bool> MesosContainerizerProcess::launch( + const ContainerID& containerId, + const TaskInfo& taskInfo, + const ExecutorInfo& executorInfo, + const string& directory, + const Option<string>& user, + const SlaveID& slaveId, + const PID<Slave>& slavePid, + bool checkpoint) +{ + if (taskInfo.has_container()) { + // We return false as this containerizer does not support + // handling TaskInfo::ContainerInfo. + return false; + } + + return launch( + containerId, + executorInfo, + directory, + user, + slaveId, + slavePid, + checkpoint); +} + + // Launching an executor involves the following steps: // 1. Call prepare on each isolator. // 2. Fork the executor. The forked child is blocked from exec'ing until it has @@ -607,38 +629,29 @@ Future<bool> MesosContainerizerProcess::launch( containers_.put(containerId, Owned<Container>(container)); - return provision(containerId, executorInfo, slaveId, directory, checkpoint) - .then(defer(self(), - &Self::prepare, - containerId, - executorInfo, - directory, - user)) + return provision(containerId, executorInfo) .then(defer(self(), &Self::_launch, containerId, executorInfo, directory, user, + lambda::_1, slaveId, slavePid, - checkpoint, - lambda::_1)); + checkpoint)); } -Future<Nothing> MesosContainerizerProcess::provision( +Future<Option<string>> MesosContainerizerProcess::provision( const ContainerID& containerId, - const ExecutorInfo& executorInfo, - const SlaveID& slaveId, - const string& directory, - bool checkpoint) + const ExecutorInfo& executorInfo) { if (!executorInfo.has_container()) { // TODO(idownes): Consider refusing to run a container if the // containerizer is configured with an image provisioner but the // executor doesn't specify an image. - return Nothing(); + return None(); } if (executorInfo.container().type() != ContainerInfo::MESOS) { @@ -647,7 +660,7 @@ Future<Nothing> MesosContainerizerProcess::provision( if (!executorInfo.container().mesos().has_image()) { // No image to provision. - return Nothing(); + return None(); } Image image = executorInfo.container().mesos().image(); @@ -659,75 +672,32 @@ Future<Nothing> MesosContainerizerProcess::provision( } return provisioners[image.type()]->provision(containerId, image) - .then(defer(self(), - &Self::_provision, - containerId, - executorInfo, - slaveId, - checkpoint, - lambda::_1)); + .then([] (const string& rootfs) -> Option<string> { return rootfs; }); } -Future<Nothing> MesosContainerizerProcess::_provision( - const ContainerID& containerId, - const ExecutorInfo& executorInfo, - const SlaveID& slaveId, - bool checkpoint, - const string& rootfs) -{ - if (!containers_.contains(containerId)) { - return Failure("Container is already destroyed"); - } - - containers_[containerId]->rootfs = rootfs; - - if (checkpoint) { - const string path = slave::paths::getContainerRootfsPath( - slave::paths::getMetaRootDir(flags.work_dir), - slaveId, - executorInfo.framework_id(), - executorInfo.executor_id(), - containerId); - - LOG(INFO) << "Checkpointing container " << containerId << " rootfs path '" - << rootfs << "' to '" << path << "'"; - - Try<Nothing> checkpointed = slave::state::checkpoint(path, rootfs); - if (checkpointed.isError()) { - return Failure("Failed to checkpoint container rootfs path to '" + - path + "': " + checkpointed.error()); - } - } - - return Nothing(); -} - - -Future<bool> MesosContainerizerProcess::launch( +Future<bool> MesosContainerizerProcess::_launch( const ContainerID& containerId, - const TaskInfo& taskInfo, const ExecutorInfo& executorInfo, const string& directory, const Option<string>& user, + const Option<string>& rootfs, const SlaveID& slaveId, const PID<Slave>& slavePid, bool checkpoint) { - if (taskInfo.has_container()) { - // We return false as this containerizer does not support - // handling TaskInfo::ContainerInfo. - return false; - } - - return launch( - containerId, - executorInfo, - directory, - user, - slaveId, - slavePid, - checkpoint); + return prepare(containerId, executorInfo, directory, user, rootfs) + .then(defer(self(), + &Self::__launch, + containerId, + executorInfo, + directory, + user, + rootfs, + slaveId, + slavePid, + checkpoint, + lambda::_1)); } @@ -759,7 +729,8 @@ Future<list<Option<ContainerPrepareInfo>>> MesosContainerizerProcess::prepare( const ContainerID& containerId, const ExecutorInfo& executorInfo, const string& directory, - const Option<string>& user) + const Option<string>& user, + const Option<string>& rootfs) { CHECK(containers_.contains(containerId)); @@ -776,7 +747,7 @@ Future<list<Option<ContainerPrepareInfo>>> MesosContainerizerProcess::prepare( containerId, executorInfo, directory, - containers_[containerId]->rootfs, + rootfs, user, lambda::_1)); } @@ -808,11 +779,12 @@ Future<Nothing> MesosContainerizerProcess::fetch( } -Future<bool> MesosContainerizerProcess::_launch( +Future<bool> MesosContainerizerProcess::__launch( const ContainerID& containerId, const ExecutorInfo& executorInfo, const string& directory, const Option<string>& user, + const Option<string>& rootfs, const SlaveID& slaveId, const PID<Slave>& slavePid, bool checkpoint, @@ -829,9 +801,7 @@ Future<bool> MesosContainerizerProcess::_launch( // Prepare environment variables for the executor. map<string, string> environment = executorEnvironment( executorInfo, - containers_[containerId]->rootfs.isSome() - ? flags.sandbox_directory - : directory, + rootfs.isSome() ? flags.sandbox_directory : directory, slaveId, slavePid, checkpoint, @@ -866,11 +836,8 @@ Future<bool> MesosContainerizerProcess::_launch( launchFlags.command = JSON::Protobuf(executorInfo.command()); - launchFlags.directory = containers_[containerId]->rootfs.isSome() - ? flags.sandbox_directory - : directory; - - launchFlags.rootfs = containers_[containerId]->rootfs; + launchFlags.directory = rootfs.isSome() ? flags.sandbox_directory : directory; + launchFlags.rootfs = rootfs; launchFlags.user = user; launchFlags.pipe_read = pipes[0]; launchFlags.pipe_write = pipes[1]; @@ -1291,25 +1258,15 @@ void MesosContainerizerProcess::____destroy( } } - if (container->rootfs.isNone()) { - // No rootfs to clean up so continue. - _____destroy(containerId, status, message, killed, Nothing()); - return; - } - - Image::Type type = container->executorInfo.container().mesos().image().type(); - - if (!provisioners.contains(type)) { - // We should have a provisioner to handle cleaning up the rootfs - // but continue clean up even if we don't. - LOG(WARNING) << "No provisioner to clean up rootfs for container " - << containerId << " (type '" << type << "'), continuing"; - - _____destroy(containerId, status, message, killed, Nothing()); - return; + // Clean up root filesystems used for this container. + // NOTE: We need to call 'destroy' for each provisioner because + // multiple provisioners might be used for a container. + list<Future<bool>> futures; + foreachvalue (const Owned<Provisioner>& provisioner, provisioners) { + futures.push_back(provisioner->destroy(containerId)); } - provisioners[type]->destroy(containerId) + await(futures) .onAny(defer(self(), &Self::_____destroy, containerId, @@ -1327,13 +1284,19 @@ void MesosContainerizerProcess::_____destroy( const Future<Option<int>>& status, Option<string> message, bool killed, - const Future<Nothing>& future) + const Future<list<Future<bool>>>& futures) { CHECK(containers_.contains(containerId)); Container* container = containers_[containerId].get(); - if (!future.isReady()) { + CHECK_READY(futures); + + foreach (const Future<bool> future, futures.get()) { + if (future.isReady()) { + continue; + } + container->promise.fail( "Failed to clean up the root filesystem when destroying container: " + (future.isFailed() ? future.failure() : "discarded future")); http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/containerizer/mesos/containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp index 68e9139..8bfd622 100644 --- a/src/slave/containerizer/mesos/containerizer.hpp +++ b/src/slave/containerizer/mesos/containerizer.hpp @@ -181,25 +181,16 @@ private: const std::list<mesos::slave::ContainerState>& recovered, const hashset<ContainerID>& orphans); - process::Future<Nothing> provision( + process::Future<Option<std::string>> provision( const ContainerID& containerId, - const ExecutorInfo& executorInfo, - const SlaveID& slaveId, - const std::string& directory, - bool checkpoint); - - process::Future<Nothing> _provision( - const ContainerID& containerId, - const ExecutorInfo& executorInfo, - const SlaveID& slaveId, - bool checkpoint, - const std::string& rootfs); + const ExecutorInfo& executorInfo); process::Future<std::list<Option<mesos::slave::ContainerPrepareInfo>>> prepare(const ContainerID& containerId, const ExecutorInfo& executorInfo, const std::string& directory, - const Option<std::string>& user); + const Option<std::string>& user, + const Option<std::string>& rootfs); process::Future<Nothing> fetch( const ContainerID& containerId, @@ -213,6 +204,17 @@ private: const ExecutorInfo& executorInfo, const std::string& directory, const Option<std::string>& user, + const Option<std::string>& rootfs, + const SlaveID& slaveId, + const process::PID<Slave>& slavePid, + bool checkpoint); + + process::Future<bool> __launch( + const ContainerID& containerId, + const ExecutorInfo& executorInfo, + const std::string& directory, + const Option<std::string>& user, + const Option<std::string>& rootfs, const SlaveID& slaveId, const process::PID<Slave>& slavePid, bool checkpoint, @@ -225,20 +227,20 @@ private: // Continues 'destroy()' once isolators has completed. void _destroy(const ContainerID& containerId, bool killed); - // Continues 'destroy()' once all processes have been killed by the launcher. + // Continues '_destroy()' once all processes have been killed by the launcher. void __destroy( const ContainerID& containerId, const process::Future<Nothing>& future, bool killed); - // Continues '_destroy()' once we get the exit status of the executor. + // Continues '__destroy()' once we get the exit status of the executor. void ___destroy( const ContainerID& containerId, const process::Future<Option<int>>& status, const Option<std::string>& message, bool killed); - // Continues '__destroy()' once all isolators have completed + // Continues '___destroy()' once all isolators have completed // cleanup. void ____destroy( const ContainerID& containerId, @@ -247,14 +249,14 @@ private: Option<std::string> message, bool killed); - // Continues (and completes) '__destroy()' once any root filessystem + // Continues (and completes) '____destroy()' once any root filessystem // has been cleaned up. void _____destroy( const ContainerID& containerId, const process::Future<Option<int>>& status, Option<std::string> message, bool killed, - const process::Future<Nothing>& cleanup); + const process::Future<std::list<process::Future<bool>>>& futures); // Call back for when an isolator limits a container and impacts the // processes. This will trigger container destruction. @@ -322,10 +324,6 @@ private: // The executor's working directory on the host. std::string directory; - // The path to the container's rootfs, if full filesystem - // isolation is used. - Option<std::string> rootfs; - State state; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/containerizer/provisioner.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioner.hpp b/src/slave/containerizer/provisioner.hpp index 3df6d59..541dd4e 100644 --- a/src/slave/containerizer/provisioner.hpp +++ b/src/slave/containerizer/provisioner.hpp @@ -65,8 +65,9 @@ public: // Destroy a previously provisioned root filesystem. Assumes that // all references (e.g., mounts, open files) to the provisioned - // filesystem have been removed. - virtual process::Future<Nothing> destroy(const ContainerID& containerId) = 0; + // filesystem have been removed. Return false if there is no + // provisioned root filesystem for the given container. + virtual process::Future<bool> destroy(const ContainerID& containerId) = 0; }; } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/paths.cpp ---------------------------------------------------------------------- diff --git a/src/slave/paths.cpp b/src/slave/paths.cpp index 404c214..0741616 100644 --- a/src/slave/paths.cpp +++ b/src/slave/paths.cpp @@ -51,7 +51,6 @@ const char LIBPROCESS_PID_FILE[] = "libprocess.pid"; const char EXECUTOR_INFO_FILE[] = "executor.info"; const char EXECUTOR_SENTINEL_FILE[] = "executor.sentinel"; const char FORKED_PID_FILE[] = "forked.pid"; -const char CONTAINER_ROOTFS_FILE[] = "rootfs"; const char TASK_INFO_FILE[] = "task.info"; const char TASK_UPDATES_FILE[] = "task.updates"; const char RESOURCES_INFO_FILE[] = "resources.info"; @@ -269,24 +268,6 @@ string getForkedPidPath( } -string getContainerRootfsPath( - const string& rootDir, - const SlaveID& slaveId, - const FrameworkID& frameworkId, - const ExecutorID& executorId, - const ContainerID& containerId) -{ - return path::join( - getExecutorRunPath( - rootDir, - slaveId, - frameworkId, - executorId, - containerId), - CONTAINER_ROOTFS_FILE); -} - - Try<list<string>> getTaskPaths( const string& rootDir, const SlaveID& slaveId, http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/state.cpp ---------------------------------------------------------------------- diff --git a/src/slave/state.cpp b/src/slave/state.cpp index b9f2d8a..f8a9514 100644 --- a/src/slave/state.cpp +++ b/src/slave/state.cpp @@ -534,30 +534,6 @@ Try<RunState> RunState::recover( state.libprocessPid = process::UPID(pid.get()); - // Read the location of the container's root filesystem, if used. - path = paths::getContainerRootfsPath( - rootDir, slaveId, frameworkId, executorId, containerId); - - // Because a container root filesystem is optional we'll recover the - // path if present but continue to recover normally if it's not. - if (os::exists(path)) { - Try<string> rootfs = os::read(path); - if (rootfs.isError()) { - message = "Failed to recover container rootfs from '" + path + - "': " + rootfs.error(); - - if (strict) { - return Error(message); - } else { - LOG(WARNING) << message; - state.errors++; - return state; - } - } - - state.rootfs = rootfs.get(); - } - return state; } http://git-wip-us.apache.org/repos/asf/mesos/blob/3a862137/src/slave/state.hpp ---------------------------------------------------------------------- diff --git a/src/slave/state.hpp b/src/slave/state.hpp index cecf200..5a1a9bb 100644 --- a/src/slave/state.hpp +++ b/src/slave/state.hpp @@ -208,7 +208,6 @@ struct RunState hashmap<TaskID, TaskState> tasks; Option<pid_t> forkedPid; Option<process::UPID> libprocessPid; - Option<std::string> rootfs; // Executor terminated and all its updates acknowledged. bool completed;
