Repository: mesos Updated Branches: refs/heads/master d376f05fe -> 52cf9b3ff
Retry freeze in cgroups TasksKiller to workaround MESOS-1689. Review: https://reviews.apache.org/r/24511 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/52cf9b3f Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/52cf9b3f Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/52cf9b3f Branch: refs/heads/master Commit: 52cf9b3ffbbe7648d1b529b5112b1b5b6360eaa5 Parents: d376f05 Author: Jie Yu <[email protected]> Authored: Fri Aug 8 14:56:16 2014 -0700 Committer: Jie Yu <[email protected]> Committed: Fri Aug 8 15:44:23 2014 -0700 ---------------------------------------------------------------------- src/linux/cgroups.cpp | 31 ++++++++++++++++++++++++++++++- src/linux/cgroups.hpp | 7 +++++++ 2 files changed, 37 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/52cf9b3f/src/linux/cgroups.cpp ---------------------------------------------------------------------- diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp index 39a4874..47be0ef 100644 --- a/src/linux/cgroups.cpp +++ b/src/linux/cgroups.cpp @@ -1470,10 +1470,30 @@ protected: virtual void finalize() { chain.discard(); + + // TODO(jieyu): Wait until 'chain' is in DISCARDED state before + // discarding 'promise'. promise.discard(); } private: + static Future<Nothing> freezeTimedout( + Future<Nothing> future, + const PID<TasksKiller>& pid, + const string& hierarchy, + const string& cgroup) + { + // Cancel the freeze operation. + // TODO(jieyu): Wait until 'future' is in DISCARDED state before + // starting retry. + future.discard(); + + // Thaw the cgroup before trying to freeze again to allow any + // pending signals to be delivered. See MESOS-1689 for details. + return cgroups::freezer::thaw(hierarchy, cgroup) + .then(defer(pid, &Self::freeze)); + } + void killTasks() { // Chain together the steps needed to kill all tasks in the cgroup. chain = freeze() // Freeze the cgroup. @@ -1486,7 +1506,16 @@ private: Future<Nothing> freeze() { - return cgroups::freezer::freeze(hierarchy, cgroup); + // TODO(jieyu): This is a workaround for MESOS-1689. We will move + // away from freezer once we have pid namespace support. + return cgroups::freezer::freeze(hierarchy, cgroup).after( + FREEZE_RETRY_INTERVAL, + lambda::bind( + &freezeTimedout, + lambda::_1, + self(), + hierarchy, + cgroup)); } Future<Nothing> kill() http://git-wip-us.apache.org/repos/asf/mesos/blob/52cf9b3f/src/linux/cgroups.hpp ---------------------------------------------------------------------- diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp index 9dfba6e..26dcb3d 100644 --- a/src/linux/cgroups.hpp +++ b/src/linux/cgroups.hpp @@ -45,6 +45,13 @@ namespace cgroups { // explicitly specified. const Duration DESTROY_TIMEOUT = Seconds(60); + +// Freezing a cgroup may get stuck (see MESOS-1689 for details). To +// workaround, we may want to thaw the cgroup and retry freezing it. +// This is the suggested retry interval. +const Duration FREEZE_RETRY_INTERVAL = Seconds(10); + + // Default number of assign attempts when moving threads to a cgroup. const unsigned int THREAD_ASSIGN_RETRIES = 100;
