Add ns::pid::destroy() to destroy a pid namespace. All processes are signalled with SIGKILL then reaped. The order of signalling is not determined, i.e., generally the init pid is not the first pid signalled.
Review: https://reviews.apache.org/r/25966/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/03421130 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/03421130 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/03421130 Branch: refs/heads/master Commit: 03421130e827975214db6fafbcd97258055c5d79 Parents: 3650573 Author: Ian Downes <[email protected]> Authored: Fri Oct 24 11:50:13 2014 -0700 Committer: Ian Downes <[email protected]> Committed: Tue Oct 28 12:04:16 2014 -0700 ---------------------------------------------------------------------- src/linux/ns.hpp | 80 +++++++++++++++++++++++++++++++++++++++++++++ src/tests/ns_tests.cpp | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/03421130/src/linux/ns.hpp ---------------------------------------------------------------------- diff --git a/src/linux/ns.hpp b/src/linux/ns.hpp index 60adaa4..ec1a517 100644 --- a/src/linux/ns.hpp +++ b/src/linux/ns.hpp @@ -40,6 +40,10 @@ #include <stout/os/exists.hpp> #include <stout/os/ls.hpp> +#include <process/collect.hpp> +#include <process/future.hpp> +#include <process/reap.hpp> + namespace ns { // Returns all the supported namespaces by the kernel. @@ -223,6 +227,82 @@ inline Try<ino_t> getns(pid_t pid, const std::string& ns) return s.st_ino; } + +namespace pid { + +namespace internal { + +inline Nothing _nothing() { return Nothing(); } + +} // namespace internal { + +inline process::Future<Nothing> destroy(ino_t inode) +{ + // Check we're not trying to kill the root namespace. + Try<ino_t> ns = ns::getns(1, "pid"); + if (ns.isError()) { + return process::Failure(ns.error()); + } + + if (ns.get() == inode) { + return process::Failure("Cannot destroy root pid namespace"); + } + + // Or ourselves. + ns = ns::getns(::getpid(), "pid"); + if (ns.isError()) { + return process::Failure(ns.error()); + } + + if (ns.get() == inode) { + return process::Failure("Cannot destroy own pid namespace"); + } + + // Signal all pids in the namespace, including the init pid if it's + // still running. Once the init pid has been signalled the kernel + // will prevent any new children forking in the namespace and will + // also signal all other pids in the namespace. + Try<std::set<pid_t>> pids = os::pids(); + if (pids.isError()) { + return process::Failure("Failed to list of processes"); + } + + foreach (pid_t pid, pids.get()) { + // Ignore any errors, probably because the process no longer + // exists, and ignorable otherwise. + Try<ino_t> ns = ns::getns(pid, "pid"); + if (ns.isSome() && ns.get() == inode) { + kill(pid, SIGKILL); + } + } + + // Get a new snapshot and do a second pass of the pids to capture + // any pids that are dying so we can reap them. + pids = os::pids(); + if (pids.isError()) { + return process::Failure("Failed to list of processes"); + } + + std::list<process::Future<Option<int>>> futures; + + foreach (pid_t pid, pids.get()) { + Try<ino_t> ns = ns::getns(pid, "pid"); + if (ns.isSome() && ns.get() == inode) { + futures.push_back(process::reap(pid)); + } + + // Ignore any errors, probably because the process no longer + // exists, and ignorable otherwise. + } + + // Wait for all the signalled processes to terminate. The pid + // namespace wil then be empty and will be released by the kernel + // (unless there are additional references). + return process::collect(futures) + .then(lambda::bind(&internal::_nothing)); +} + +} // namespace pid { } // namespace ns { #endif // __LINUX_NS_HPP__ http://git-wip-us.apache.org/repos/asf/mesos/blob/03421130/src/tests/ns_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/ns_tests.cpp b/src/tests/ns_tests.cpp index 30218cf..eb385d0 100644 --- a/src/tests/ns_tests.cpp +++ b/src/tests/ns_tests.cpp @@ -214,3 +214,76 @@ TEST(NsTest, ROOT_getns) ASSERT_TRUE(WIFSIGNALED(status)); EXPECT_EQ(SIGKILL, WTERMSIG(status)); } + + +static int childDestroy(void* arg) +{ + // Fork a bunch of children. + ::fork(); + ::fork(); + ::fork(); + + // Parent and all children sleep. + while (true) { sleep(1); } + + ABORT("Error, child should be killed before reaching here"); +} + + +// Test we can destroy a pid namespace, i.e., kill all processes. +TEST(NsTest, ROOT_destroy) +{ + set<string> namespaces = ns::namespaces(); + + if (namespaces.count("pid") == 0) { + // Pid namespace is not available. + return; + } + + Try<int> nstype = ns::nstype("pid"); + ASSERT_SOME(nstype); + + // 8 MiB stack for child. + static unsigned long long stack[(8*1024*1024)/sizeof(unsigned long long)]; + + pid_t pid = clone( + childDestroy, + &stack[sizeof(stack)/sizeof(stack[0]) - 1], // Stack grows down. + SIGCHLD | nstype.get(), + NULL); + + ASSERT_NE(-1, pid); + + Future<Option<int>> status = process::reap(pid); + + // Ensure the child is in a different pid namespace. + Try<ino_t> childNs = ns::getns(pid, "pid"); + ASSERT_SOME(childNs); + + Try<ino_t> ourNs = ns::getns(::getpid(), "pid"); + ASSERT_SOME(ourNs); + + ASSERT_NE(ourNs.get(), childNs.get()); + + // Kill the child. + AWAIT_READY(ns::pid::destroy(childNs.get())); + + AWAIT_READY(status); + ASSERT_SOME(status.get()); + ASSERT_TRUE(WIFSIGNALED(status.get().get())); + EXPECT_EQ(SIGKILL, WTERMSIG(status.get().get())); + + // Finally, verify that no processes are in the child's pid + // namespace, i.e., destroy() also killed all descendants. + Try<set<pid_t>> pids = os::pids(); + ASSERT_SOME(pids); + + foreach (pid_t pid, pids.get()) { + Try<ino_t> otherNs = ns::getns(pid, "pid"); + // pid may have exited since getting the snapshot of pids so + // ignore any error. + if (otherNs.isSome()) { + ASSERT_SOME_NE(childNs.get(), otherNs); + } + } +}
