Use pid namespace to destroy container when available. The Linux launcher will check if a container is running in a pid namespace and will kill all processes rather than using the freezer. This approach is backwards and forwards compatible.
Review: https://reviews.apache.org/r/25966 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fa44b0a9 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fa44b0a9 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fa44b0a9 Branch: refs/heads/master Commit: fa44b0a9f040bff81043b1fd9963efe6dac80379 Parents: 823b992 Author: Ian Downes <[email protected]> Authored: Fri Oct 24 11:57:43 2014 -0700 Committer: Ian Downes <[email protected]> Committed: Tue Oct 28 12:04:16 2014 -0700 ---------------------------------------------------------------------- src/slave/containerizer/linux_launcher.cpp | 32 ++++ src/tests/slave_recovery_tests.cpp | 215 ++++++++++++++++++++++++ 2 files changed, 247 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/fa44b0a9/src/slave/containerizer/linux_launcher.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp index 7a4ef69..10c1203 100644 --- a/src/slave/containerizer/linux_launcher.cpp +++ b/src/slave/containerizer/linux_launcher.cpp @@ -31,11 +31,14 @@ #include <stout/strings.hpp> #include "linux/cgroups.hpp" +#include "linux/ns.hpp" #include "mesos/resources.hpp" #include "slave/containerizer/linux_launcher.hpp" +#include "slave/containerizer/isolators/namespaces/pid.hpp" + using namespace process; using std::list; @@ -365,6 +368,35 @@ Future<Nothing> LinuxLauncher::destroy(const ContainerID& containerId) pids.erase(containerId); + // Just return if the cgroup was destroyed and the slave didn't receive the + // notification. See comment in recover(). + Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId)); + if (exists.isError()) { + return Failure("Failed to check existence of freezer cgroup: " + + exists.error()); + } + + if (!exists.get()) { + return Nothing(); + } + + Result<ino_t> containerPidNs = + NamespacesPidIsolatorProcess::getNamespace(containerId); + + if (containerPidNs.isSome()) { + LOG(INFO) << "Using pid namespace to destroy container " << containerId; + + return ns::pid::destroy(containerPidNs.get()) + .then(lambda::bind( + (Future<Nothing>(*)(const string&, + const string&, + const Duration&))(&cgroups::destroy), + hierarchy, + cgroup(containerId), + cgroups::DESTROY_TIMEOUT)); + } + + // Try to clean up using just the freezer cgroup. return cgroups::destroy( hierarchy, cgroup(containerId), http://git-wip-us.apache.org/repos/asf/mesos/blob/fa44b0a9/src/tests/slave_recovery_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp index 813e2d6..98e059f 100644 --- a/src/tests/slave_recovery_tests.cpp +++ b/src/tests/slave_recovery_tests.cpp @@ -3490,3 +3490,218 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward) delete containerizer2.get(); } #endif // __linux__ + + +#ifdef __linux__ +// Test that a container started without namespace/pid isolation can +// be destroyed correctly with namespace/pid isolation enabled. +TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceForward) +{ + Try<PID<Master> > master = this->StartMaster(); + ASSERT_SOME(master); + + // Start a slave using a containerizer without pid namespace + // isolation. + slave::Flags flags = this->CreateSlaveFlags(); + flags.isolation = "cgroups/cpu,cgroups/mem"; + flags.slave_subsystems = ""; + + Try<MesosContainerizer*> containerizer1 = + MesosContainerizer::create(flags, true); + ASSERT_SOME(containerizer1); + + Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags); + ASSERT_SOME(slave); + + MockScheduler sched; + + // Scheduler expectations. + EXPECT_CALL(sched, statusUpdate(_, _)) + .WillRepeatedly(Return()); + + // Enable checkpointing for the framework. + FrameworkInfo frameworkInfo; + frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO); + frameworkInfo.set_checkpoint(true); + + MesosSchedulerDriver driver( + &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); + + EXPECT_CALL(sched, registered(_, _, _)); + + Future<vector<Offer> > offers1; + EXPECT_CALL(sched, resourceOffers(_, _)) + .WillOnce(FutureArg<1>(&offers1)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + driver.start(); + + AWAIT_READY(offers1); + EXPECT_NE(0u, offers1.get().size()); + + SlaveID slaveId = offers1.get()[0].slave_id(); + + TaskInfo task1 = createTask( + slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000"); + vector<TaskInfo> tasks1; + tasks1.push_back(task1); + + // Message expectations. + Future<Message> registerExecutorMessage = + FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); + + driver.launchTasks(offers1.get()[0].id(), tasks1); + + AWAIT_READY(registerExecutorMessage); + + Future<hashset<ContainerID> > containers = containerizer1.get()->containers(); + AWAIT_READY(containers); + ASSERT_EQ(1u, containers.get().size()); + + ContainerID containerId = *(containers.get().begin()); + + // Stop the slave. + this->Stop(slave.get()); + delete containerizer1.get(); + + // Start a slave using a containerizer with pid namespace isolation. + flags.isolation = "cgroups/cpu,cgroups/mem,namespaces/pid"; + + Try<MesosContainerizer*> containerizer2 = + MesosContainerizer::create(flags, true); + ASSERT_SOME(containerizer2); + + Future<vector<Offer> > offers2; + EXPECT_CALL(sched, resourceOffers(_, _)) + .WillOnce(FutureArg<1>(&offers2)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + slave = this->StartSlave(containerizer2.get(), flags); + ASSERT_SOME(slave); + + AWAIT_READY(offers2); + EXPECT_NE(0u, offers2.get().size()); + + // Set up to wait on the container's termination. + Future<containerizer::Termination> termination = + containerizer2.get()->wait(containerId); + + // Destroy the container. + containerizer2.get()->destroy(containerId); + + AWAIT_READY(termination); + + driver.stop(); + driver.join(); + + this->Shutdown(); + delete containerizer2.get(); +} + + +// Test that a container started with namespace/pid isolation can +// be destroyed correctly without namespace/pid isolation enabled. +TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceBackward) +{ + Try<PID<Master> > master = this->StartMaster(); + ASSERT_SOME(master); + + // Start a slave using a containerizer with pid namespace isolation. + slave::Flags flags = this->CreateSlaveFlags(); + flags.isolation = "cgroups/cpu,cgroups/mem,namespaces/pid"; + flags.slave_subsystems = ""; + + Try<MesosContainerizer*> containerizer1 = + MesosContainerizer::create(flags, true); + ASSERT_SOME(containerizer1); + + Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags); + ASSERT_SOME(slave); + + MockScheduler sched; + + // Scheduler expectations. + EXPECT_CALL(sched, statusUpdate(_, _)) + .WillRepeatedly(Return()); + + // Enable checkpointing for the framework. + FrameworkInfo frameworkInfo; + frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO); + frameworkInfo.set_checkpoint(true); + + MesosSchedulerDriver driver( + &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); + + EXPECT_CALL(sched, registered(_, _, _)); + + Future<vector<Offer> > offers1; + EXPECT_CALL(sched, resourceOffers(_, _)) + .WillOnce(FutureArg<1>(&offers1)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + driver.start(); + + AWAIT_READY(offers1); + EXPECT_NE(0u, offers1.get().size()); + + SlaveID slaveId = offers1.get()[0].slave_id(); + + TaskInfo task1 = createTask( + slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000"); + vector<TaskInfo> tasks1; + tasks1.push_back(task1); + + // Message expectations. + Future<Message> registerExecutorMessage = + FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _); + + driver.launchTasks(offers1.get()[0].id(), tasks1); + + AWAIT_READY(registerExecutorMessage); + + Future<hashset<ContainerID> > containers = containerizer1.get()->containers(); + AWAIT_READY(containers); + ASSERT_EQ(1u, containers.get().size()); + + ContainerID containerId = *(containers.get().begin()); + + // Stop the slave. + this->Stop(slave.get()); + delete containerizer1.get(); + + // Start a slave using a containerizer without pid namespace + // isolation. + flags.isolation = "cgroups/cpu,cgroups/mem"; + + Try<MesosContainerizer*> containerizer2 = + MesosContainerizer::create(flags, true); + ASSERT_SOME(containerizer2); + + Future<vector<Offer> > offers2; + EXPECT_CALL(sched, resourceOffers(_, _)) + .WillOnce(FutureArg<1>(&offers2)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + slave = this->StartSlave(containerizer2.get(), flags); + ASSERT_SOME(slave); + + AWAIT_READY(offers2); + EXPECT_NE(0u, offers2.get().size()); + + // Set up to wait on the container's termination. + Future<containerizer::Termination> termination = + containerizer2.get()->wait(containerId); + + // Destroy the container. + containerizer2.get()->destroy(containerId); + + AWAIT_READY(termination); + + driver.stop(); + driver.join(); + + this->Shutdown(); + delete containerizer2.get(); +} + +#endif // __linux__
