Repository: mesos
Updated Branches:
  refs/heads/master 30a56be17 -> 0a2957ed0


Make sure the mesos-fetcher exits if the slave terminates.

Review: https://reviews.apache.org/r/24722


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f66fa52e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f66fa52e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f66fa52e

Branch: refs/heads/master
Commit: f66fa52e7efd9c10f9256805e45095591d4833a7
Parents: 30a56be
Author: Benjamin Hindman <[email protected]>
Authored: Thu Aug 14 17:22:47 2014 -0700
Committer: Benjamin Hindman <[email protected]>
Committed: Fri Aug 15 16:59:08 2014 -0700

----------------------------------------------------------------------
 src/launcher/fetcher.cpp                        | 46 ++++++++++++++++++++
 src/slave/containerizer/mesos/containerizer.cpp |  2 +
 2 files changed, 48 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f66fa52e/src/launcher/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/fetcher.cpp b/src/launcher/fetcher.cpp
index 50e9918..1e3d516 100644
--- a/src/launcher/fetcher.cpp
+++ b/src/launcher/fetcher.cpp
@@ -20,6 +20,8 @@
 
 #include <mesos/mesos.hpp>
 
+#include <process/io.hpp>
+
 #include <stout/net.hpp>
 #include <stout/option.hpp>
 #include <stout/os.hpp>
@@ -191,10 +193,54 @@ Try<string> fetch(
 }
 
 
+// A helper function for abnormally cancelling the fetching because
+// our parent has died (e.g., the slave).
+process::Future<Nothing> cancel()
+{
+  // We don't easily have a handle on any of the children we've
+  // potentially started since they're hidden behind os::system,
+  // net::download, HDFS, etc, so we just do a killtree on all of our
+  // children.
+  //
+  // TODO(benh): This still isn't sufficient because we might be in
+  // the middle of forking a process. What we really need to do is run
+  // os::kiltree "outside" of this process so that we can pause this
+  // process too!
+  Try<os::ProcessTree> pstree = os::pstree(0);
+
+  if (pstree.isSome() && !pstree.get().children.empty()) {
+    foreach (const os::ProcessTree& child, pstree.get().children) {
+      // NOTE: We don't follow groups or sessions because it's
+      // possible we'll end up killing ourselves, or worse, the slave!
+      os::killtree(child.process.pid, 9);
+    }
+  }
+
+  EXIT(1) << "Cancelled fetching because stdin was closed "
+          << "(e.g., because the parent has exited)";
+
+  return Nothing();
+}
+
+
 int main(int argc, char* argv[])
 {
   GOOGLE_PROTOBUF_VERIFY_VERSION;
 
+  // The current semantics of the mesos-fetcher is that it should
+  // terminate if/when its parent terminates. To support this, we read
+  // from stdin and if/when we get back an EOF then we "cancel" any
+  // fetching and exit so we don't become an orphan (which would be
+  // especially bad in the event calling something like HDFS ends up
+  // hung indefinitely).
+  //
+  // TODO(benh): Introduce a timeout for fetching each URI that can be
+  // set via flags on the slave.
+  //
+  // TODO(benh): Introduce a flag here for changing these semantics.
+  process::io::read(STDIN_FILENO)
+    .then(lambda::bind(&cancel));
+
   CommandInfo commandInfo;
   // Construct URIs from the encoded environment string.
   const std::string& uris = os::getenv("MESOS_EXECUTOR_URIS");

http://git-wip-us.apache.org/repos/asf/mesos/blob/f66fa52e/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index d0676c5..cdf440d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -502,6 +502,8 @@ Future<Nothing> MesosContainerizerProcess::fetch(
   LOG(INFO) << "Fetching URIs for container '" << containerId
             << "' using command '" << command << "'";
 
+  // NOTE: It's important that we create a pipe for the mesos-fetcher
+  // stdin so that when the slave exits it will terminate itself.
   Try<Subprocess> fetcher = subprocess(
       command,
       Subprocess::PIPE(),

Reply via email to