This is an automated email from the ASF dual-hosted git repository.

gilbert pushed a commit to branch 1.4.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 83d1ae6100421d06dba6887c6c493e103dc90dcf
Author: Andrei Budnik <abud...@mesosphere.com>
AuthorDate: Wed Jan 9 11:09:08 2019 -0800

    Fixed the FD leak if containerizer::_launch() failed or discarded.
    
    For the period between IOSB server is up and container process exec,
    if the containerizer launch returns failure or discarded, the FD used
    for signaling from the container process to the IOSB finish redirect
    will be leaked, which would cause the IOSB stuck at `io::redirect`
    forever. It would block the containerizer from cleaning up this
    container at IOSB.
    
    This issue could be exposed if there are frequent check containers
    launch on an agent with heavy loads.
    
    This patch fixes the issue by handling discard of a `launch` future,
    so the container IO is cleaned up and therefore all FDs are closed.
    
    Review: https://reviews.apache.org/r/69684/
    (cherry picked from commit 6938af6e8edc15b24846adface1eeb98032e3463)
---
 src/slave/containerizer/mesos/containerizer.cpp | 54 +++++++++++++------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index 73334ff..d7db2b8 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -1130,40 +1130,42 @@ Future<bool> MesosContainerizerProcess::launch(
 
   containers_.put(containerId, container);
 
+  Future<Nothing> _prepare;
+
   // We'll first provision the image for the container, and
   // then provision the images specified in `volumes` using
   // the 'volume/image' isolator.
   if (!containerConfig.has_container_info() ||
       !containerConfig.container_info().mesos().has_image()) {
-    return prepare(containerId, None())
-      .then(defer(self(), [this, containerId] () {
-        return ioSwitchboard->extractContainerIO(containerId);
-      }))
-      .then(defer(self(),
-                  &Self::_launch,
-                  containerId,
-                  lambda::_1,
-                  environment,
-                  pidCheckpointPath));
-  }
-
-  container->provisioning = provisioner->provision(
+    _prepare = prepare(containerId, None());
+  } else {
+    container->provisioning = provisioner->provision(
       containerId,
       containerConfig.container_info().mesos().image());
 
-  return container->provisioning
-    .then(defer(self(),
-                [=](const ProvisionInfo& provisionInfo) -> Future<bool> {
-      return prepare(containerId, provisionInfo)
-        .then(defer(self(), [this, containerId] () {
-          return ioSwitchboard->extractContainerIO(containerId);
-        }))
-        .then(defer(self(),
-                    &Self::_launch,
-                    containerId,
-                    lambda::_1,
-                    environment,
-                    pidCheckpointPath));
+    _prepare = container->provisioning
+      .then(defer(self(), [=](const ProvisionInfo& provisionInfo) {
+        return prepare(containerId, provisionInfo);
+      }));
+  }
+
+  return _prepare
+    .then(defer(self(), [this, containerId] () {
+      return ioSwitchboard->extractContainerIO(containerId);
+    }))
+    .then(defer(self(), [=](const Option<ContainerIO>& containerIO) {
+      return _launch(containerId, containerIO, environment, pidCheckpointPath);
+    }))
+    .onAny(defer(self(), [this, containerId](
+        const Future<Containerizer::LaunchResult>& future) {
+      // We need to clean up the container IO in the case when IOSwitchboard
+      // process has started, but we have not taken ownership of the container
+      // IO by calling `extractContainerIO()`. This may happen if `launch`
+      // future is discarded by the caller of this method. The container IO
+      // stores FDs in the `FDWrapper` struct, which closes these FDs on its
+      // destruction. Otherwise, IOSwitchboard might get stuck trying to read
+      // leaked FDs.
+      ioSwitchboard->extractContainerIO(containerId);
     }));
 }
 

Reply via email to