Correctly recover pid in Linux launcher.

If the freezer cgroup is absent (if the slave terminates after the
cgroup is destroyed but before realizing) we should still recover the
pid because we check this on destroy().

Review: https://reviews.apache.org/r/27166/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/823b9924
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/823b9924
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/823b9924

Branch: refs/heads/master
Commit: 823b99248cc36b4bd2918b382bdec8afa58030ce
Parents: 7b196d2
Author: Ian Downes <[email protected]>
Authored: Fri Oct 24 11:55:09 2014 -0700
Committer: Ian Downes <[email protected]>
Committed: Tue Oct 28 12:04:16 2014 -0700

----------------------------------------------------------------------
 src/slave/containerizer/linux_launcher.cpp | 29 ++++++++++++++-----------
 1 file changed, 16 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/823b9924/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp 
b/src/slave/containerizer/linux_launcher.cpp
index 6930efe..7a4ef69 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -132,17 +132,6 @@ Future<Nothing> LinuxLauncher::recover(const 
std::list<state::RunState>& states)
     }
     const ContainerID& containerId = state.id.get();
 
-    Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
-
-    if (!exists.get()) {
-      // This may occur if the freezer cgroup was destroyed but the
-      // slave dies before noticing this. The containerizer will
-      // monitor the container's pid and notice that it has exited,
-      // triggering destruction of the container.
-      LOG(INFO) << "Couldn't find freezer cgroup for container " << 
containerId;
-      continue;
-    }
-
     if (state.forkedPid.isNone()) {
       return Failure("Executor pid is required to recover container " +
                      stringify(containerId));
@@ -161,8 +150,24 @@ Future<Nothing> LinuxLauncher::recover(const 
std::list<state::RunState>& states)
                      " for container " + stringify(containerId));
     }
 
+    // Store the pid now because if the freezer cgroup is absent
+    // (slave terminated after the cgroup is destroyed but before it
+    // was notified) then we'll still need it for the check in
+    // destroy() when we clean up.
     pids.put(containerId, pid);
 
+    Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
+
+    if (!exists.get()) {
+      // This may occur if the freezer cgroup was destroyed but the
+      // slave dies before noticing this. The containerizer will
+      // monitor the container's pid and notice that it has exited,
+      // triggering destruction of the container.
+      LOG(INFO) << "Couldn't find freezer cgroup for container "
+                << containerId << ", assuming already destroyed";
+      continue;
+    }
+
     cgroups.insert(cgroup(containerId));
   }
 
@@ -344,8 +349,6 @@ Try<pid_t> LinuxLauncher::fork(
     return Error("Failed to synchronize child process");
   }
 
-  // Store the pid (session id and process group id) if this is the
-  // first process forked for this container.
   if (!pids.contains(containerId)) {
     pids.put(containerId, child.get().pid());
   }

Reply via email to