[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

2019-05-01 Thread abudnik
This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.4.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit ca65f991727494b9b78e64a19306231ac004289f
Author: Qian Zhang 
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

Removed the duplicate pid check in Docker containerizer.

Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 31a47f0..97cd75e 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -925,10 +925,6 @@ Future DockerContainerizerProcess::_recover(
   }
 }
 
-// Collection of pids that we've started reaping in order to
-// detect very unlikely duplicate scenario (see below).
-hashmap pids;
-
 foreachvalue (const FrameworkState& framework, state->frameworks) {
   foreachvalue (const ExecutorState& executor, framework.executors) {
 if (executor.info.isNone()) {
@@ -1007,9 +1003,12 @@ Future DockerContainerizerProcess::_recover(
 
 // Only reap the executor process if the executor can be connected
 // otherwise just set `container->status` to `None()`. This is to
-// avoid reaping an irrelevant process, e.g., after the agent host is
-// rebooted, the executor pid happens to be reused by another process.
-// See MESOS-8125 for details.
+// avoid reaping an irrelevant process, e.g., agent process is stopped
+// for a long time, and during this time executor terminates and its
+// pid happens to be reused by another irrelevant process. When agent
+// is restarted, it still considers this executor not complete (i.e.,
+// `run->completed` is false), so we would reap the irrelevant process
+// if we do not check whether that process can be connected.
 // Note that if both the pid and the port of the executor are reused
 // by another process or two processes respectively after the agent
 // host reboots we will still reap an irrelevant process, but that
@@ -1045,20 +1044,6 @@ Future DockerContainerizerProcess::_recover(
 container->status.future().get()
   .onAny(defer(self(), ::reaped, containerId));
 
-if (pids.containsValue(pid)) {
-  // This should (almost) never occur. There is the
-  // possibility that a new executor is launched with the same
-  // pid as one that just exited (highly unlikely) and the
-  // slave dies after the new executor is launched but before
-  // it hears about the termination of the earlier executor
-  // (also unlikely).
-  return Failure(
-  "Detected duplicate pid " + stringify(pid) +
-  " for container " + stringify(containerId));
-}
-
-pids.put(containerId, pid);
-
 const string sandboxDirectory = paths::getExecutorRunPath(
 flags.work_dir,
 state->id,



[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

2019-04-30 Thread abudnik
This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.5.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit f4a6453c77719ed531a9287b6c9cdeb7ad268865
Author: Qian Zhang 
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

Removed the duplicate pid check in Docker containerizer.

Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 9dbb286..81f72d4 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -934,10 +934,6 @@ Future DockerContainerizerProcess::_recover(
   }
 }
 
-// Collection of pids that we've started reaping in order to
-// detect very unlikely duplicate scenario (see below).
-hashmap pids;
-
 foreachvalue (const FrameworkState& framework, state->frameworks) {
   foreachvalue (const ExecutorState& executor, framework.executors) {
 if (executor.info.isNone()) {
@@ -1016,9 +1012,12 @@ Future DockerContainerizerProcess::_recover(
 
 // Only reap the executor process if the executor can be connected
 // otherwise just set `container->status` to `None()`. This is to
-// avoid reaping an irrelevant process, e.g., after the agent host is
-// rebooted, the executor pid happens to be reused by another process.
-// See MESOS-8125 for details.
+// avoid reaping an irrelevant process, e.g., agent process is stopped
+// for a long time, and during this time executor terminates and its
+// pid happens to be reused by another irrelevant process. When agent
+// is restarted, it still considers this executor not complete (i.e.,
+// `run->completed` is false), so we would reap the irrelevant process
+// if we do not check whether that process can be connected.
 // Note that if both the pid and the port of the executor are reused
 // by another process or two processes respectively after the agent
 // host reboots we will still reap an irrelevant process, but that
@@ -1054,20 +1053,6 @@ Future DockerContainerizerProcess::_recover(
 container->status.future().get()
   .onAny(defer(self(), ::reaped, containerId));
 
-if (pids.containsValue(pid)) {
-  // This should (almost) never occur. There is the
-  // possibility that a new executor is launched with the same
-  // pid as one that just exited (highly unlikely) and the
-  // slave dies after the new executor is launched but before
-  // it hears about the termination of the earlier executor
-  // (also unlikely).
-  return Failure(
-  "Detected duplicate pid " + stringify(pid) +
-  " for container " + stringify(containerId));
-}
-
-pids.put(containerId, pid);
-
 const string sandboxDirectory = paths::getExecutorRunPath(
 flags.work_dir,
 state->id,



[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

2019-04-30 Thread abudnik
This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit d627a919c651e8dacd14569c027fe4422ef87828
Author: Qian Zhang 
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

Removed the duplicate pid check in Docker containerizer.

Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index ef468ed..85b22f4 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -937,10 +937,6 @@ Future DockerContainerizerProcess::_recover(
   }
 }
 
-// Collection of pids that we've started reaping in order to
-// detect very unlikely duplicate scenario (see below).
-hashmap pids;
-
 foreachvalue (const FrameworkState& framework, state->frameworks) {
   foreachvalue (const ExecutorState& executor, framework.executors) {
 if (executor.info.isNone()) {
@@ -1019,9 +1015,12 @@ Future DockerContainerizerProcess::_recover(
 
 // Only reap the executor process if the executor can be connected
 // otherwise just set `container->status` to `None()`. This is to
-// avoid reaping an irrelevant process, e.g., after the agent host is
-// rebooted, the executor pid happens to be reused by another process.
-// See MESOS-8125 for details.
+// avoid reaping an irrelevant process, e.g., agent process is stopped
+// for a long time, and during this time executor terminates and its
+// pid happens to be reused by another irrelevant process. When agent
+// is restarted, it still considers this executor not complete (i.e.,
+// `run->completed` is false), so we would reap the irrelevant process
+// if we do not check whether that process can be connected.
 // Note that if both the pid and the port of the executor are reused
 // by another process or two processes respectively after the agent
 // host reboots we will still reap an irrelevant process, but that
@@ -1057,20 +1056,6 @@ Future DockerContainerizerProcess::_recover(
 container->status.future()
   ->onAny(defer(self(), ::reaped, containerId));
 
-if (pids.containsValue(pid)) {
-  // This should (almost) never occur. There is the
-  // possibility that a new executor is launched with the same
-  // pid as one that just exited (highly unlikely) and the
-  // slave dies after the new executor is launched but before
-  // it hears about the termination of the earlier executor
-  // (also unlikely).
-  return Failure(
-  "Detected duplicate pid " + stringify(pid) +
-  " for container " + stringify(containerId));
-}
-
-pids.put(containerId, pid);
-
 const string sandboxDirectory = paths::getExecutorRunPath(
 flags.work_dir,
 state->id,



[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

2019-04-30 Thread abudnik
This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit bd438b797a15724945a60ee8d57cec656e23ae2d
Author: Qian Zhang 
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

Removed the duplicate pid check in Docker containerizer.

Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 192dc29..dacf4de 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -945,10 +945,6 @@ Future DockerContainerizerProcess::_recover(
   }
 }
 
-// Collection of pids that we've started reaping in order to
-// detect very unlikely duplicate scenario (see below).
-hashmap pids;
-
 foreachvalue (const FrameworkState& framework, state->frameworks) {
   foreachvalue (const ExecutorState& executor, framework.executors) {
 if (executor.info.isNone()) {
@@ -1027,9 +1023,12 @@ Future DockerContainerizerProcess::_recover(
 
 // Only reap the executor process if the executor can be connected
 // otherwise just set `container->status` to `None()`. This is to
-// avoid reaping an irrelevant process, e.g., after the agent host is
-// rebooted, the executor pid happens to be reused by another process.
-// See MESOS-8125 for details.
+// avoid reaping an irrelevant process, e.g., agent process is stopped
+// for a long time, and during this time executor terminates and its
+// pid happens to be reused by another irrelevant process. When agent
+// is restarted, it still considers this executor not complete (i.e.,
+// `run->completed` is false), so we would reap the irrelevant process
+// if we do not check whether that process can be connected.
 // Note that if both the pid and the port of the executor are reused
 // by another process or two processes respectively after the agent
 // host reboots we will still reap an irrelevant process, but that
@@ -1065,20 +1064,6 @@ Future DockerContainerizerProcess::_recover(
 container->status.future()
   ->onAny(defer(self(), ::reaped, containerId));
 
-if (pids.containsValue(pid)) {
-  // This should (almost) never occur. There is the
-  // possibility that a new executor is launched with the same
-  // pid as one that just exited (highly unlikely) and the
-  // slave dies after the new executor is launched but before
-  // it hears about the termination of the earlier executor
-  // (also unlikely).
-  return Failure(
-  "Detected duplicate pid " + stringify(pid) +
-  " for container " + stringify(containerId));
-}
-
-pids.put(containerId, pid);
-
 const string sandboxDirectory = paths::getExecutorRunPath(
 flags.work_dir,
 state->id,



[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

2019-04-30 Thread abudnik
This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 02f532ae876196c0c8abad9d6effb75d3ffa5db7
Author: Qian Zhang 
AuthorDate: Tue Apr 30 13:59:54 2019 +0200

Removed the duplicate pid check in Docker containerizer.

Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 7f1d471..e4ad945 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -936,10 +936,6 @@ Future DockerContainerizerProcess::_recover(
   }
 }
 
-// Collection of pids that we've started reaping in order to
-// detect very unlikely duplicate scenario (see below).
-hashmap pids;
-
 foreachvalue (const FrameworkState& framework, state->frameworks) {
   foreachvalue (const ExecutorState& executor, framework.executors) {
 if (executor.info.isNone()) {
@@ -1018,9 +1014,12 @@ Future DockerContainerizerProcess::_recover(
 
 // Only reap the executor process if the executor can be connected
 // otherwise just set `container->status` to `None()`. This is to
-// avoid reaping an irrelevant process, e.g., after the agent host is
-// rebooted, the executor pid happens to be reused by another process.
-// See MESOS-8125 for details.
+// avoid reaping an irrelevant process, e.g., agent process is stopped
+// for a long time, and during this time executor terminates and its
+// pid happens to be reused by another irrelevant process. When agent
+// is restarted, it still considers this executor not complete (i.e.,
+// `run->completed` is false), so we would reap the irrelevant process
+// if we do not check whether that process can be connected.
 // Note that if both the pid and the port of the executor are reused
 // by another process or two processes respectively after the agent
 // host reboots we will still reap an irrelevant process, but that
@@ -1056,20 +1055,6 @@ Future DockerContainerizerProcess::_recover(
 container->status.future()
   ->onAny(defer(self(), ::reaped, containerId));
 
-if (pids.contains_value(pid)) {
-  // This should (almost) never occur. There is the
-  // possibility that a new executor is launched with the same
-  // pid as one that just exited (highly unlikely) and the
-  // slave dies after the new executor is launched but before
-  // it hears about the termination of the earlier executor
-  // (also unlikely).
-  return Failure(
-  "Detected duplicate pid " + stringify(pid) +
-  " for container " + stringify(containerId));
-}
-
-pids.put(containerId, pid);
-
 const string sandboxDirectory = paths::getExecutorRunPath(
 flags.work_dir,
 state->id,