Repository: mesos Updated Branches: refs/heads/master f93f4fca5 -> 27a2fb864
Tracked recovered and prepared cgroups subsystems for containers. Recover newly added cgroups subsystems on existing containers would fail, and continue to perform the `update` and other operations of the newly added subsystems on them don't make sense. This patch add the tracking for the recovered or prepared cgroups subsystems of a container and skip performing unnecessary subsystem operations on the container if the subsystem is never recovered or prepared. Review: https://reviews.apache.org/r/51631/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/27a2fb86 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/27a2fb86 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/27a2fb86 Branch: refs/heads/master Commit: 27a2fb864f7967142f346c2f594bbba494209f57 Parents: f93f4fc Author: haosdent huang <haosd...@gmail.com> Authored: Tue Sep 20 09:56:04 2016 -0700 Committer: Jie Yu <yujie....@gmail.com> Committed: Tue Sep 20 09:56:04 2016 -0700 ---------------------------------------------------------------------- .../mesos/isolators/cgroups/cgroups.cpp | 62 ++++++++++++++------ .../mesos/isolators/cgroups/cgroups.hpp | 5 ++ 2 files changed, 48 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/27a2fb86/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp index 937356b..c2fcc0b 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp @@ -302,6 +302,7 @@ Future<Nothing> CgroupsIsolatorProcess::___recover( const string cgroup = path::join(flags.cgroups_root, containerId.value()); list<Future<Nothing>> recovers; + hashset<string> recoveredSubsystems; // TODO(haosdent): Use foreachkey once MESOS-5037 is resolved. foreach (const string& hierarchy, subsystems.keys()) { @@ -327,6 +328,7 @@ Future<Nothing> CgroupsIsolatorProcess::___recover( } foreach (const Owned<Subsystem>& subsystem, subsystems.get(hierarchy)) { + recoveredSubsystems.insert(subsystem->name()); recovers.push_back(subsystem->recover(containerId)); } } @@ -336,12 +338,14 @@ Future<Nothing> CgroupsIsolatorProcess::___recover( PID<CgroupsIsolatorProcess>(this), &CgroupsIsolatorProcess::____recover, containerId, + recoveredSubsystems, lambda::_1)); } Future<Nothing> CgroupsIsolatorProcess::____recover( const ContainerID& containerId, + const hashset<string>& recoveredSubsystems, const list<Future<Nothing>>& futures) { vector<string> errors; @@ -365,6 +369,8 @@ Future<Nothing> CgroupsIsolatorProcess::____recover( containerId, path::join(flags.cgroups_root, containerId.value()))); + infos[containerId]->subsystems = recoveredSubsystems; + return Nothing(); } @@ -390,6 +396,8 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare( containerId, path::join(flags.cgroups_root, containerId.value()))); + list<Future<Nothing>> prepares; + // TODO(haosdent): Use foreachkey once MESOS-5037 is resolved. foreach (const string& hierarchy, subsystems.keys()) { string path = path::join(hierarchy, infos[containerId]->cgroup); @@ -419,6 +427,11 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare( "'" + path + "': " + create.error()); } + foreach (const Owned<Subsystem>& subsystem, subsystems.get(hierarchy)) { + infos[containerId]->subsystems.insert(subsystem->name()); + prepares.push_back(subsystem->prepare(containerId)); + } + // Chown the cgroup so the executor can create nested cgroups. Do // not recurse so the control files are still owned by the slave // user and thus cannot be changed by the executor. @@ -467,11 +480,6 @@ Future<Option<ContainerLaunchInfo>> CgroupsIsolatorProcess::prepare( } } - list<Future<Nothing>> prepares; - foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - prepares.push_back(subsystem->prepare(containerId)); - } - return await(prepares) .then(defer( PID<CgroupsIsolatorProcess>(this), @@ -607,12 +615,14 @@ Future<ContainerLimitation> CgroupsIsolatorProcess::watch( } foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - subsystem->watch(containerId) - .onAny(defer( - PID<CgroupsIsolatorProcess>(this), - &CgroupsIsolatorProcess::_watch, - containerId, - lambda::_1)); + if (infos[containerId]->subsystems.contains(subsystem->name())) { + subsystem->watch(containerId) + .onAny(defer( + PID<CgroupsIsolatorProcess>(this), + &CgroupsIsolatorProcess::_watch, + containerId, + lambda::_1)); + } } return infos[containerId]->limitation.future(); @@ -647,7 +657,9 @@ Future<Nothing> CgroupsIsolatorProcess::update( list<Future<Nothing>> updates; foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - updates.push_back(subsystem->update(containerId, resources)); + if (infos[containerId]->subsystems.contains(subsystem->name())) { + updates.push_back(subsystem->update(containerId, resources)); + } } return await(updates) @@ -693,7 +705,9 @@ Future<ResourceStatistics> CgroupsIsolatorProcess::usage( list<Future<ResourceStatistics>> usages; foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - usages.push_back(subsystem->usage(containerId)); + if (infos[containerId]->subsystems.contains(subsystem->name())) { + usages.push_back(subsystem->usage(containerId)); + } } return await(usages) @@ -729,7 +743,9 @@ Future<ContainerStatus> CgroupsIsolatorProcess::status( list<Future<ContainerStatus>> statuses; foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - statuses.push_back(subsystem->status(containerId)); + if (infos[containerId]->subsystems.contains(subsystem->name())) { + statuses.push_back(subsystem->status(containerId)); + } } return await(statuses) @@ -768,7 +784,9 @@ Future<Nothing> CgroupsIsolatorProcess::cleanup( list<Future<Nothing>> cleanups; foreachvalue (const Owned<Subsystem>& subsystem, subsystems) { - cleanups.push_back(subsystem->cleanup(containerId)); + if (infos[containerId]->subsystems.contains(subsystem->name())) { + cleanups.push_back(subsystem->cleanup(containerId)); + } } return await(cleanups) @@ -805,10 +823,16 @@ Future<Nothing> CgroupsIsolatorProcess::_cleanup( // TODO(haosdent): Use foreachkey once MESOS-5037 is resolved. foreach (const string& hierarchy, subsystems.keys()) { - destroys.push_back(cgroups::destroy( - hierarchy, - infos[containerId]->cgroup, - cgroups::DESTROY_TIMEOUT)); + foreach (const Owned<Subsystem>& subsystem, subsystems.get(hierarchy)) { + if (infos[containerId]->subsystems.contains(subsystem->name())) { + destroys.push_back(cgroups::destroy( + hierarchy, + infos[containerId]->cgroup, + cgroups::DESTROY_TIMEOUT)); + + break; + } + } } return await(destroys) http://git-wip-us.apache.org/repos/asf/mesos/blob/27a2fb86/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp index cf143ee..229bafc 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp @@ -97,6 +97,10 @@ private: // This promise will complete if a container is impacted by a resource // limitation and should be terminated. process::Promise<mesos::slave::ContainerLimitation> limitation; + + // This `hashset` stores the name of subsystems which are recovered + // or prepared for the container. + hashset<std::string> subsystems; }; CgroupsIsolatorProcess( @@ -117,6 +121,7 @@ private: process::Future<Nothing> ____recover( const ContainerID& containerId, + const hashset<std::string>& recoveredSubsystems, const std::list<process::Future<Nothing>>& futures); process::Future<Option<mesos::slave::ContainerLaunchInfo>> _prepare(