[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.
This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch 1.4.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit ca65f991727494b9b78e64a19306231ac004289f Author: Qian Zhang AuthorDate: Tue Apr 30 13:23:26 2019 +0200 Removed the duplicate pid check in Docker containerizer. Review: https://reviews.apache.org/r/70561/ --- src/slave/containerizer/docker.cpp | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 31a47f0..97cd75e 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -925,10 +925,6 @@ Future DockerContainerizerProcess::_recover( } } -// Collection of pids that we've started reaping in order to -// detect very unlikely duplicate scenario (see below). -hashmap pids; - foreachvalue (const FrameworkState& framework, state->frameworks) { foreachvalue (const ExecutorState& executor, framework.executors) { if (executor.info.isNone()) { @@ -1007,9 +1003,12 @@ Future DockerContainerizerProcess::_recover( // Only reap the executor process if the executor can be connected // otherwise just set `container->status` to `None()`. This is to -// avoid reaping an irrelevant process, e.g., after the agent host is -// rebooted, the executor pid happens to be reused by another process. -// See MESOS-8125 for details. +// avoid reaping an irrelevant process, e.g., agent process is stopped +// for a long time, and during this time executor terminates and its +// pid happens to be reused by another irrelevant process. When agent +// is restarted, it still considers this executor not complete (i.e., +// `run->completed` is false), so we would reap the irrelevant process +// if we do not check whether that process can be connected. // Note that if both the pid and the port of the executor are reused // by another process or two processes respectively after the agent // host reboots we will still reap an irrelevant process, but that @@ -1045,20 +1044,6 @@ Future DockerContainerizerProcess::_recover( container->status.future().get() .onAny(defer(self(), ::reaped, containerId)); -if (pids.containsValue(pid)) { - // This should (almost) never occur. There is the - // possibility that a new executor is launched with the same - // pid as one that just exited (highly unlikely) and the - // slave dies after the new executor is launched but before - // it hears about the termination of the earlier executor - // (also unlikely). - return Failure( - "Detected duplicate pid " + stringify(pid) + - " for container " + stringify(containerId)); -} - -pids.put(containerId, pid); - const string sandboxDirectory = paths::getExecutorRunPath( flags.work_dir, state->id,
[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.
This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch 1.5.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit f4a6453c77719ed531a9287b6c9cdeb7ad268865 Author: Qian Zhang AuthorDate: Tue Apr 30 13:23:26 2019 +0200 Removed the duplicate pid check in Docker containerizer. Review: https://reviews.apache.org/r/70561/ --- src/slave/containerizer/docker.cpp | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 9dbb286..81f72d4 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -934,10 +934,6 @@ Future DockerContainerizerProcess::_recover( } } -// Collection of pids that we've started reaping in order to -// detect very unlikely duplicate scenario (see below). -hashmap pids; - foreachvalue (const FrameworkState& framework, state->frameworks) { foreachvalue (const ExecutorState& executor, framework.executors) { if (executor.info.isNone()) { @@ -1016,9 +1012,12 @@ Future DockerContainerizerProcess::_recover( // Only reap the executor process if the executor can be connected // otherwise just set `container->status` to `None()`. This is to -// avoid reaping an irrelevant process, e.g., after the agent host is -// rebooted, the executor pid happens to be reused by another process. -// See MESOS-8125 for details. +// avoid reaping an irrelevant process, e.g., agent process is stopped +// for a long time, and during this time executor terminates and its +// pid happens to be reused by another irrelevant process. When agent +// is restarted, it still considers this executor not complete (i.e., +// `run->completed` is false), so we would reap the irrelevant process +// if we do not check whether that process can be connected. // Note that if both the pid and the port of the executor are reused // by another process or two processes respectively after the agent // host reboots we will still reap an irrelevant process, but that @@ -1054,20 +1053,6 @@ Future DockerContainerizerProcess::_recover( container->status.future().get() .onAny(defer(self(), ::reaped, containerId)); -if (pids.containsValue(pid)) { - // This should (almost) never occur. There is the - // possibility that a new executor is launched with the same - // pid as one that just exited (highly unlikely) and the - // slave dies after the new executor is launched but before - // it hears about the termination of the earlier executor - // (also unlikely). - return Failure( - "Detected duplicate pid " + stringify(pid) + - " for container " + stringify(containerId)); -} - -pids.put(containerId, pid); - const string sandboxDirectory = paths::getExecutorRunPath( flags.work_dir, state->id,
[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.
This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch 1.6.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit d627a919c651e8dacd14569c027fe4422ef87828 Author: Qian Zhang AuthorDate: Tue Apr 30 13:23:26 2019 +0200 Removed the duplicate pid check in Docker containerizer. Review: https://reviews.apache.org/r/70561/ --- src/slave/containerizer/docker.cpp | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index ef468ed..85b22f4 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -937,10 +937,6 @@ Future DockerContainerizerProcess::_recover( } } -// Collection of pids that we've started reaping in order to -// detect very unlikely duplicate scenario (see below). -hashmap pids; - foreachvalue (const FrameworkState& framework, state->frameworks) { foreachvalue (const ExecutorState& executor, framework.executors) { if (executor.info.isNone()) { @@ -1019,9 +1015,12 @@ Future DockerContainerizerProcess::_recover( // Only reap the executor process if the executor can be connected // otherwise just set `container->status` to `None()`. This is to -// avoid reaping an irrelevant process, e.g., after the agent host is -// rebooted, the executor pid happens to be reused by another process. -// See MESOS-8125 for details. +// avoid reaping an irrelevant process, e.g., agent process is stopped +// for a long time, and during this time executor terminates and its +// pid happens to be reused by another irrelevant process. When agent +// is restarted, it still considers this executor not complete (i.e., +// `run->completed` is false), so we would reap the irrelevant process +// if we do not check whether that process can be connected. // Note that if both the pid and the port of the executor are reused // by another process or two processes respectively after the agent // host reboots we will still reap an irrelevant process, but that @@ -1057,20 +1056,6 @@ Future DockerContainerizerProcess::_recover( container->status.future() ->onAny(defer(self(), ::reaped, containerId)); -if (pids.containsValue(pid)) { - // This should (almost) never occur. There is the - // possibility that a new executor is launched with the same - // pid as one that just exited (highly unlikely) and the - // slave dies after the new executor is launched but before - // it hears about the termination of the earlier executor - // (also unlikely). - return Failure( - "Detected duplicate pid " + stringify(pid) + - " for container " + stringify(containerId)); -} - -pids.put(containerId, pid); - const string sandboxDirectory = paths::getExecutorRunPath( flags.work_dir, state->id,
[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.
This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit bd438b797a15724945a60ee8d57cec656e23ae2d Author: Qian Zhang AuthorDate: Tue Apr 30 13:23:26 2019 +0200 Removed the duplicate pid check in Docker containerizer. Review: https://reviews.apache.org/r/70561/ --- src/slave/containerizer/docker.cpp | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 192dc29..dacf4de 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -945,10 +945,6 @@ Future DockerContainerizerProcess::_recover( } } -// Collection of pids that we've started reaping in order to -// detect very unlikely duplicate scenario (see below). -hashmap pids; - foreachvalue (const FrameworkState& framework, state->frameworks) { foreachvalue (const ExecutorState& executor, framework.executors) { if (executor.info.isNone()) { @@ -1027,9 +1023,12 @@ Future DockerContainerizerProcess::_recover( // Only reap the executor process if the executor can be connected // otherwise just set `container->status` to `None()`. This is to -// avoid reaping an irrelevant process, e.g., after the agent host is -// rebooted, the executor pid happens to be reused by another process. -// See MESOS-8125 for details. +// avoid reaping an irrelevant process, e.g., agent process is stopped +// for a long time, and during this time executor terminates and its +// pid happens to be reused by another irrelevant process. When agent +// is restarted, it still considers this executor not complete (i.e., +// `run->completed` is false), so we would reap the irrelevant process +// if we do not check whether that process can be connected. // Note that if both the pid and the port of the executor are reused // by another process or two processes respectively after the agent // host reboots we will still reap an irrelevant process, but that @@ -1065,20 +1064,6 @@ Future DockerContainerizerProcess::_recover( container->status.future() ->onAny(defer(self(), ::reaped, containerId)); -if (pids.containsValue(pid)) { - // This should (almost) never occur. There is the - // possibility that a new executor is launched with the same - // pid as one that just exited (highly unlikely) and the - // slave dies after the new executor is launched but before - // it hears about the termination of the earlier executor - // (also unlikely). - return Failure( - "Detected duplicate pid " + stringify(pid) + - " for container " + stringify(containerId)); -} - -pids.put(containerId, pid); - const string sandboxDirectory = paths::getExecutorRunPath( flags.work_dir, state->id,
[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.
This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch 1.8.x in repository https://gitbox.apache.org/repos/asf/mesos.git commit 02f532ae876196c0c8abad9d6effb75d3ffa5db7 Author: Qian Zhang AuthorDate: Tue Apr 30 13:59:54 2019 +0200 Removed the duplicate pid check in Docker containerizer. Review: https://reviews.apache.org/r/70561/ --- src/slave/containerizer/docker.cpp | 27 ++- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 7f1d471..e4ad945 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -936,10 +936,6 @@ Future DockerContainerizerProcess::_recover( } } -// Collection of pids that we've started reaping in order to -// detect very unlikely duplicate scenario (see below). -hashmap pids; - foreachvalue (const FrameworkState& framework, state->frameworks) { foreachvalue (const ExecutorState& executor, framework.executors) { if (executor.info.isNone()) { @@ -1018,9 +1014,12 @@ Future DockerContainerizerProcess::_recover( // Only reap the executor process if the executor can be connected // otherwise just set `container->status` to `None()`. This is to -// avoid reaping an irrelevant process, e.g., after the agent host is -// rebooted, the executor pid happens to be reused by another process. -// See MESOS-8125 for details. +// avoid reaping an irrelevant process, e.g., agent process is stopped +// for a long time, and during this time executor terminates and its +// pid happens to be reused by another irrelevant process. When agent +// is restarted, it still considers this executor not complete (i.e., +// `run->completed` is false), so we would reap the irrelevant process +// if we do not check whether that process can be connected. // Note that if both the pid and the port of the executor are reused // by another process or two processes respectively after the agent // host reboots we will still reap an irrelevant process, but that @@ -1056,20 +1055,6 @@ Future DockerContainerizerProcess::_recover( container->status.future() ->onAny(defer(self(), ::reaped, containerId)); -if (pids.contains_value(pid)) { - // This should (almost) never occur. There is the - // possibility that a new executor is launched with the same - // pid as one that just exited (highly unlikely) and the - // slave dies after the new executor is launched but before - // it hears about the termination of the earlier executor - // (also unlikely). - return Failure( - "Detected duplicate pid " + stringify(pid) + - " for container " + stringify(containerId)); -} - -pids.put(containerId, pid); - const string sandboxDirectory = paths::getExecutorRunPath( flags.work_dir, state->id,