This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new c8004ee  Removed the duplicate pid check in Docker containerizer.
c8004ee is described below

commit c8004ee8a0962d0e0f9147718853160bb708f5bc
Author: Qian Zhang <zhq527...@gmail.com>
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

    Removed the duplicate pid check in Docker containerizer.
    
    Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 7f1d471..e4ad945 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -936,10 +936,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
       }
     }
 
-    // Collection of pids that we've started reaping in order to
-    // detect very unlikely duplicate scenario (see below).
-    hashmap<ContainerID, pid_t> pids;
-
     foreachvalue (const FrameworkState& framework, state->frameworks) {
       foreachvalue (const ExecutorState& executor, framework.executors) {
         if (executor.info.isNone()) {
@@ -1018,9 +1014,12 @@ Future<Nothing> DockerContainerizerProcess::_recover(
 
         // Only reap the executor process if the executor can be connected
         // otherwise just set `container->status` to `None()`. This is to
-        // avoid reaping an irrelevant process, e.g., after the agent host is
-        // rebooted, the executor pid happens to be reused by another process.
-        // See MESOS-8125 for details.
+        // avoid reaping an irrelevant process, e.g., agent process is stopped
+        // for a long time, and during this time executor terminates and its
+        // pid happens to be reused by another irrelevant process. When agent
+        // is restarted, it still considers this executor not complete (i.e.,
+        // `run->completed` is false), so we would reap the irrelevant process
+        // if we do not check whether that process can be connected.
         // Note that if both the pid and the port of the executor are reused
         // by another process or two processes respectively after the agent
         // host reboots we will still reap an irrelevant process, but that
@@ -1056,20 +1055,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
         container->status.future()
           ->onAny(defer(self(), &Self::reaped, containerId));
 
-        if (pids.contains_value(pid)) {
-          // This should (almost) never occur. There is the
-          // possibility that a new executor is launched with the same
-          // pid as one that just exited (highly unlikely) and the
-          // slave dies after the new executor is launched but before
-          // it hears about the termination of the earlier executor
-          // (also unlikely).
-          return Failure(
-              "Detected duplicate pid " + stringify(pid) +
-              " for container " + stringify(containerId));
-        }
-
-        pids.put(containerId, pid);
-
         const string sandboxDirectory = paths::getExecutorRunPath(
             flags.work_dir,
             state->id,

Reply via email to