Converted watchdog option into a childhook in libprocess [1/2].

Review: https://reviews.apache.org/r/52120/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6d58c241
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6d58c241
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6d58c241

Branch: refs/heads/master
Commit: 6d58c241716244d29a932440eabed31dccb639cf
Parents: f4be028
Author: Joerg Schad <jo...@mesosphere.io>
Authored: Wed Sep 21 09:50:39 2016 -0700
Committer: Jie Yu <yujie....@gmail.com>
Committed: Wed Sep 21 09:50:39 2016 -0700

----------------------------------------------------------------------
 .../include/process/posix/subprocess.hpp        | 90 +-------------------
 .../include/process/subprocess_base.hpp         | 36 ++++----
 3rdparty/libprocess/src/subprocess.cpp          | 83 +++++++++++++++++-
 .../libprocess/src/tests/subprocess_tests.cpp   |  2 +-
 4 files changed, 98 insertions(+), 113 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6d58c241/3rdparty/libprocess/include/process/posix/subprocess.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/include/process/posix/subprocess.hpp 
b/3rdparty/libprocess/include/process/posix/subprocess.hpp
index a225919..aa4609d 100644
--- a/3rdparty/libprocess/include/process/posix/subprocess.hpp
+++ b/3rdparty/libprocess/include/process/posix/subprocess.hpp
@@ -126,79 +126,6 @@ inline void signalHandler(int signal)
 }
 
 
-// Creates a separate watchdog process to monitor the child process and
-// kill it in case the parent process dies.
-//
-// NOTE: This function needs to be async signal safe. In fact,
-// all the library functions we used in this function are async
-// signal safe.
-inline int watchdogProcess()
-{
-#ifdef __linux__
-  // Send SIGTERM to the current process if the parent (i.e., the
-  // slave) exits.
-  // NOTE:: This function should always succeed because we are passing
-  // in a valid signal.
-  prctl(PR_SET_PDEATHSIG, SIGTERM);
-
-  // Put the current process into a separate process group so that
-  // we can kill it and all its children easily.
-  if (setpgid(0, 0) != 0) {
-    abort();
-  }
-
-  // Install a SIGTERM handler which will kill the current process
-  // group. Since we already setup the death signal above, the
-  // signal handler will be triggered when the parent (e.g., the
-  // slave) exits.
-  if (os::signals::install(SIGTERM, &signalHandler) != 0) {
-    abort();
-  }
-
-  pid_t pid = fork();
-  if (pid == -1) {
-    abort();
-  } else if (pid == 0) {
-    // Child. This is the process that is going to exec the
-    // process if zero is returned.
-
-    // We setup death signal for the process as well in case
-    // someone, though unlikely, accidentally kill the parent of
-    // this process (the bookkeeping process).
-    prctl(PR_SET_PDEATHSIG, SIGKILL);
-
-    // NOTE: We don't need to clear the signal handler explicitly
-    // because the subsequent 'exec' will clear them.
-    return 0;
-  } else {
-    // Parent. This is the bookkeeping process which will wait for
-    // the child process to finish.
-
-    // Close the files to prevent interference on the communication
-    // between the slave and the child process.
-    ::close(STDIN_FILENO);
-    ::close(STDOUT_FILENO);
-    ::close(STDERR_FILENO);
-
-    // Block until the child process finishes.
-    int status = 0;
-    if (waitpid(pid, &status, 0) == -1) {
-      abort();
-    }
-
-    // Forward the exit status if the child process exits normally.
-    if (WIFEXITED(status)) {
-      _exit(WEXITSTATUS(status));
-    }
-
-    abort();
-    UNREACHABLE();
-  }
-#endif
-  return 0;
-}
-
-
 // The main entry of the child process.
 //
 // NOTE: This function has to be async signal safe.
@@ -211,8 +138,7 @@ inline int childMain(
     const OutputFileDescriptors& stderrfds,
     bool blocking,
     int pipes[2],
-    const vector<Subprocess::ChildHook>& child_hooks,
-    const Watchdog watchdog)
+    const vector<Subprocess::ChildHook>& child_hooks)
 {
   // Close parent's end of the pipes.
   if (stdinfds.write.isSome()) {
@@ -283,16 +209,6 @@ inline int childMain(
     }
   }
 
-  // If the child process should die together with its parent we spawn a
-  // separate watchdog process which kills the child when the parent dies.
-  //
-  // NOTE: The watchdog process sets the process group id in order for it and
-  // its child processes to be killed together. We should not (re)set the sid
-  // after this.
-  if (watchdog == MONITOR) {
-    watchdogProcess();
-  }
-
   os::execvpe(path.c_str(), argv, envp);
 
   ABORT("Failed to os::execvpe on path '" + path + "': " + 
os::strerror(errno));
@@ -307,7 +223,6 @@ inline Try<pid_t> cloneChild(
         pid_t(const lambda::function<int()>&)>>& _clone,
     const vector<Subprocess::ParentHook>& parent_hooks,
     const vector<Subprocess::ChildHook>& child_hooks,
-    const Watchdog watchdog,
     const InputFileDescriptors stdinfds,
     const OutputFileDescriptors stdoutfds,
     const OutputFileDescriptors stderrfds)
@@ -368,8 +283,7 @@ inline Try<pid_t> cloneChild(
       stderrfds,
       blocking,
       pipes,
-      child_hooks,
-      watchdog));
+      child_hooks));
 
   delete[] _argv;
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/6d58c241/3rdparty/libprocess/include/process/subprocess_base.hpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/include/process/subprocess_base.hpp 
b/3rdparty/libprocess/include/process/subprocess_base.hpp
index 69d12ce..1d02454 100644
--- a/3rdparty/libprocess/include/process/subprocess_base.hpp
+++ b/3rdparty/libprocess/include/process/subprocess_base.hpp
@@ -34,13 +34,6 @@
 
 namespace process {
 
-// Flag describing whether a new process should be monitored by a separate
-// watch process and be killed in case the parent process dies.
-enum Watchdog {
-  MONITOR,
-  NO_MONITOR,
-};
-
 /**
  * Represents a fork() exec()ed subprocess. Access is provided to the
  * input / output of the process, as well as the exit status. The
@@ -144,8 +137,7 @@ public:
         const Option<lambda::function<
             pid_t(const lambda::function<int()>&)>>& clone,
         const std::vector<Subprocess::ParentHook>& parent_hooks,
-        const std::vector<Subprocess::ChildHook>& child_hooks,
-        const Watchdog watchdog);
+        const std::vector<Subprocess::ChildHook>& child_hooks);
 
     IO(const lambda::function<Try<InputFileDescriptors>()>& _input,
        const lambda::function<Try<OutputFileDescriptors>()>& _output)
@@ -207,6 +199,16 @@ public:
      */
     static ChildHook SETSID();
 
+    /**
+     * `ChildHook` for starting a Supervisor process monitoring
+     *  and killing the child process if the parent process terminates.
+     *
+     * NOTE: The supervisor process sets the process group id in order for it
+     * and its child processes to be killed together. We should not (re)set the
+     * sid after this.
+     */
+    static ChildHook SUPERVISOR();
+
     Try<Nothing> operator()() const { return child_setup(); }
 
   private:
@@ -293,8 +295,7 @@ private:
       const Option<lambda::function<
           pid_t(const lambda::function<int()>&)>>& clone,
       const std::vector<Subprocess::ParentHook>& parent_hooks,
-      const std::vector<Subprocess::ChildHook>& child_hooks,
-      const Watchdog watchdog);
+      const std::vector<Subprocess::ChildHook>& child_hooks);
 
   struct Data
   {
@@ -359,8 +360,6 @@ private:
  *     before the child execs.
  * @param child_hooks Hooks that will be executed in the child
  *     before the child execs but after parent_hooks have executed.
- * @param watchdog Indicator whether the new process should be monitored
- *     and killed if the parent process terminates.
  * @return The subprocess or an error if one occurred.
  */
 // TODO(jmlvanre): Consider removing default argument for
@@ -376,8 +375,7 @@ Try<Subprocess> subprocess(
     const Option<lambda::function<
         pid_t(const lambda::function<int()>&)>>& clone = None(),
     const std::vector<Subprocess::ParentHook>& parent_hooks = {},
-    const std::vector<Subprocess::ChildHook>& child_hooks = {},
-    const Watchdog watchdog = NO_MONITOR);
+    const std::vector<Subprocess::ChildHook>& child_hooks = {});
 
 
 /**
@@ -400,8 +398,6 @@ Try<Subprocess> subprocess(
  *     before the child execs.
  * @param child_hooks Hooks that will be executed in the child
  *     before the child execs but after parent_hooks have executed.
- * @param watchdog Indicator whether the new process should be monitored
- *     and killed if the parent process terminates.
  * @return The subprocess or an error if one occurred.
  */
 // TODO(jmlvanre): Consider removing default argument for
@@ -415,8 +411,7 @@ inline Try<Subprocess> subprocess(
     const Option<lambda::function<
         pid_t(const lambda::function<int()>&)>>& clone = None(),
     const std::vector<Subprocess::ParentHook>& parent_hooks = {},
-    const std::vector<Subprocess::ChildHook>& child_hooks = {},
-    const Watchdog watchdog = NO_MONITOR)
+    const std::vector<Subprocess::ChildHook>& child_hooks = {})
 {
   std::vector<std::string> argv = {os::Shell::arg0, os::Shell::arg1, command};
 
@@ -430,8 +425,7 @@ inline Try<Subprocess> subprocess(
       environment,
       clone,
       parent_hooks,
-      child_hooks,
-      watchdog);
+      child_hooks);
 }
 
 } // namespace process {

http://git-wip-us.apache.org/repos/asf/mesos/blob/6d58c241/3rdparty/libprocess/src/subprocess.cpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/src/subprocess.cpp 
b/3rdparty/libprocess/src/subprocess.cpp
index 123f800..4a8bb2e 100644
--- a/3rdparty/libprocess/src/subprocess.cpp
+++ b/3rdparty/libprocess/src/subprocess.cpp
@@ -85,6 +85,85 @@ Subprocess::ChildHook Subprocess::ChildHook::SETSID()
   });
 }
 
+
+inline void signalHandler(int signal)
+{
+  // Send SIGKILL to every process in the process group of the
+  // calling process.
+  kill(0, SIGKILL);
+  abort();
+}
+
+
+Subprocess::ChildHook Subprocess::ChildHook::SUPERVISOR()
+{
+  return Subprocess::ChildHook([]() -> Try<Nothing> {
+    #ifdef __linux__
+      // Send SIGTERM to the current process if the parent (i.e., the
+      // slave) exits.
+      // NOTE:: This function should always succeed because we are passing
+      // in a valid signal.
+      prctl(PR_SET_PDEATHSIG, SIGTERM);
+
+      // Put the current process into a separate process group so that
+      // we can kill it and all its children easily.
+      if (setpgid(0, 0) != 0) {
+        return Error("Could not start supervisor process.");
+      }
+
+      // Install a SIGTERM handler which will kill the current process
+      // group. Since we already setup the death signal above, the
+      // signal handler will be triggered when the parent (e.g., the
+      // slave) exits.
+      if (os::signals::install(SIGTERM, &signalHandler) != 0) {
+        return Error("Could not start supervisor process.");
+      }
+
+      pid_t pid = fork();
+      if (pid == -1) {
+        return Error("Could not start supervisor process.");
+      } else if (pid == 0) {
+        // Child. This is the process that is going to exec the
+        // process if zero is returned.
+
+        // We setup death signal for the process as well in case
+        // someone, though unlikely, accidentally kill the parent of
+        // this process (the bookkeeping process).
+        prctl(PR_SET_PDEATHSIG, SIGKILL);
+
+        // NOTE: We don't need to clear the signal handler explicitly
+        // because the subsequent 'exec' will clear them.
+        return Nothing();
+      } else {
+        // Parent. This is the bookkeeping process which will wait for
+        // the child process to finish.
+
+        // Close the files to prevent interference on the communication
+        // between the slave and the child process.
+        ::close(STDIN_FILENO);
+        ::close(STDOUT_FILENO);
+        ::close(STDERR_FILENO);
+
+        // Block until the child process finishes.
+        int status = 0;
+        if (waitpid(pid, &status, 0) == -1) {
+          abort();
+        }
+
+        // Forward the exit status if the child process exits normally.
+        if (WIFEXITED(status)) {
+          _exit(WEXITSTATUS(status));
+        }
+
+        abort();
+        UNREACHABLE();
+      }
+    #endif
+    return Nothing();
+  });
+}
+
+
 namespace internal {
 
 static void cleanup(
@@ -129,8 +208,7 @@ Try<Subprocess> subprocess(
     const Option<lambda::function<
         pid_t(const lambda::function<int()>&)>>& _clone,
     const vector<Subprocess::ParentHook>& parent_hooks,
-    const vector<Subprocess::ChildHook>& child_hooks,
-    const Watchdog watchdog)
+    const vector<Subprocess::ChildHook>& child_hooks)
 {
   // File descriptors for redirecting stdin/stdout/stderr.
   // These file descriptors are used for different purposes depending
@@ -207,7 +285,6 @@ Try<Subprocess> subprocess(
         _clone,
         parent_hooks,
         child_hooks,
-        watchdog,
         stdinfds,
         stdoutfds,
         stderrfds);

http://git-wip-us.apache.org/repos/asf/mesos/blob/6d58c241/3rdparty/libprocess/src/tests/subprocess_tests.cpp
----------------------------------------------------------------------
diff --git a/3rdparty/libprocess/src/tests/subprocess_tests.cpp 
b/3rdparty/libprocess/src/tests/subprocess_tests.cpp
index e32f3e5..66ccff9 100644
--- a/3rdparty/libprocess/src/tests/subprocess_tests.cpp
+++ b/3rdparty/libprocess/src/tests/subprocess_tests.cpp
@@ -1003,4 +1003,4 @@ static int setupChdir(const string& directory)
 
 
 // TODO(joerg84): Consider adding tests for setsid, working_directory,
-// and watchdog options.
+// and supervisor childHooks.

Reply via email to