Correctly recover pid in Linux launcher. If the freezer cgroup is absent (if the slave terminates after the cgroup is destroyed but before realizing) we should still recover the pid because we check this on destroy().
Review: https://reviews.apache.org/r/27166/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/823b9924 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/823b9924 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/823b9924 Branch: refs/heads/master Commit: 823b99248cc36b4bd2918b382bdec8afa58030ce Parents: 7b196d2 Author: Ian Downes <[email protected]> Authored: Fri Oct 24 11:55:09 2014 -0700 Committer: Ian Downes <[email protected]> Committed: Tue Oct 28 12:04:16 2014 -0700 ---------------------------------------------------------------------- src/slave/containerizer/linux_launcher.cpp | 29 ++++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/823b9924/src/slave/containerizer/linux_launcher.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp index 6930efe..7a4ef69 100644 --- a/src/slave/containerizer/linux_launcher.cpp +++ b/src/slave/containerizer/linux_launcher.cpp @@ -132,17 +132,6 @@ Future<Nothing> LinuxLauncher::recover(const std::list<state::RunState>& states) } const ContainerID& containerId = state.id.get(); - Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId)); - - if (!exists.get()) { - // This may occur if the freezer cgroup was destroyed but the - // slave dies before noticing this. The containerizer will - // monitor the container's pid and notice that it has exited, - // triggering destruction of the container. - LOG(INFO) << "Couldn't find freezer cgroup for container " << containerId; - continue; - } - if (state.forkedPid.isNone()) { return Failure("Executor pid is required to recover container " + stringify(containerId)); @@ -161,8 +150,24 @@ Future<Nothing> LinuxLauncher::recover(const std::list<state::RunState>& states) " for container " + stringify(containerId)); } + // Store the pid now because if the freezer cgroup is absent + // (slave terminated after the cgroup is destroyed but before it + // was notified) then we'll still need it for the check in + // destroy() when we clean up. pids.put(containerId, pid); + Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId)); + + if (!exists.get()) { + // This may occur if the freezer cgroup was destroyed but the + // slave dies before noticing this. The containerizer will + // monitor the container's pid and notice that it has exited, + // triggering destruction of the container. + LOG(INFO) << "Couldn't find freezer cgroup for container " + << containerId << ", assuming already destroyed"; + continue; + } + cgroups.insert(cgroup(containerId)); } @@ -344,8 +349,6 @@ Try<pid_t> LinuxLauncher::fork( return Error("Failed to synchronize child process"); } - // Store the pid (session id and process group id) if this is the - // first process forked for this container. if (!pids.contains(containerId)) { pids.put(containerId, child.get().pid()); }
