Repository: mesos Updated Branches: refs/heads/master da607079b -> 664788b94
Fix destroying containerizer during isolator prepare. This ensures the prepare phase is completed before destroying the container. Review: https://reviews.apache.org/r/32123 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/664788b9 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/664788b9 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/664788b9 Branch: refs/heads/master Commit: 664788b94598a77c936a6d41d71e42f87026f96e Parents: da60707 Author: Timothy Chen <[email protected]> Authored: Tue Apr 21 12:43:34 2015 -0700 Committer: Timothy Chen <[email protected]> Committed: Tue Apr 21 14:25:47 2015 -0700 ---------------------------------------------------------------------- src/slave/containerizer/mesos/containerizer.cpp | 100 +++++++++---- src/slave/containerizer/mesos/containerizer.hpp | 24 +++- src/tests/containerizer_tests.cpp | 142 +++++++++++++++++++ 3 files changed, 229 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index e413609..0bd26ea 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -294,7 +294,11 @@ Future<containerizer::Termination> MesosContainerizer::wait( void MesosContainerizer::destroy(const ContainerID& containerId) { - dispatch(process.get(), &MesosContainerizerProcess::destroy, containerId); + dispatch( + process.get(), + &MesosContainerizerProcess::destroy, + containerId, + true); } @@ -520,7 +524,7 @@ void MesosContainerizerProcess::__launch( << "' of framework '" << executorInfo.framework_id() << "': " << failure; - destroy(containerId); + destroy(containerId, false); } @@ -580,6 +584,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare( const string& directory, const Option<string>& user) { + CHECK(containers_.contains(containerId)); + // We prepare the isolators sequentially according to their ordering // to permit basic dependency specification, e.g., preparing a // filesystem isolator before other isolators. @@ -596,6 +602,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare( lambda::_1)); } + containers_[containerId]->preparations = f; + return f; } @@ -933,7 +941,9 @@ Future<ResourceStatistics> MesosContainerizerProcess::usage( } -void MesosContainerizerProcess::destroy(const ContainerID& containerId) +void MesosContainerizerProcess::destroy( + const ContainerID& containerId, + bool killed) { if (!containers_.contains(containerId)) { LOG(WARNING) << "Ignoring destroy of unknown container: " << containerId; @@ -950,14 +960,23 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId) LOG(INFO) << "Destroying container '" << containerId << "'"; if (container->state == PREPARING) { - // We cannot simply terminate the container if it's preparing - // since isolator's prepare doesn't need any cleanup. - containerizer::Termination termination; - termination.set_killed(true); - termination.set_message("Container destroyed while preparing isolators"); - container->promise.set(termination); + VLOG(1) << "Waiting for the isolators to complete preparing before " + << "destroying the container"; - containers_.erase(containerId); + container->state = DESTROYING; + + Future<Option<int>> status = None(); + // We need to wait for the isolators to finish preparing to prevent + // a race that the destroy method calls isolators' cleanup before + // it starts preparing. + container->preparations + .onAny(defer( + self(), + &Self::___destroy, + containerId, + status, + "Container destroyed while preparing isolators", + killed)); return; } @@ -975,27 +994,30 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId) // Wait for the isolators to finish isolating before we start // to destroy the container. container->isolation - .onAny(defer(self(), &Self::_destroy, containerId)); + .onAny(defer(self(), &Self::_destroy, containerId, killed)); return; } container->state = DESTROYING; - _destroy(containerId); + _destroy(containerId, killed); } -void MesosContainerizerProcess::_destroy(const ContainerID& containerId) +void MesosContainerizerProcess::_destroy( + const ContainerID& containerId, + bool killed) { // Kill all processes then continue destruction. launcher->destroy(containerId) - .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1)); + .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1, killed)); } void MesosContainerizerProcess::__destroy( const ContainerID& containerId, - const Future<Nothing>& future) + const Future<Nothing>& future, + bool killed) { CHECK(containers_.contains(containerId)); @@ -1021,7 +1043,13 @@ void MesosContainerizerProcess::__destroy( // the exit status of the executor when it's ready (it may already // be) and continue the destroy. containers_[containerId]->status - .onAny(defer(self(), &Self::___destroy, containerId, lambda::_1)); + .onAny(defer( + self(), + &Self::___destroy, + containerId, + lambda::_1, + None(), + killed)); } @@ -1064,7 +1092,9 @@ static T reversed(const T& t) void MesosContainerizerProcess::___destroy( const ContainerID& containerId, - const Future<Option<int>>& status) + const Future<Option<int>>& status, + const Option<string>& message, + bool killed) { // We clean up each isolator in the reverse order they were // prepared (see comment in prepare()). @@ -1084,7 +1114,9 @@ void MesosContainerizerProcess::___destroy( &Self::____destroy, containerId, status, - lambda::_1)); + lambda::_1, + message, + killed)); return; } @@ -1093,7 +1125,9 @@ void MesosContainerizerProcess::___destroy( void MesosContainerizerProcess::____destroy( const ContainerID& containerId, const Future<Option<int>>& status, - const Future<list<Future<Nothing>>>& cleanups) + const Future<list<Future<Nothing>>>& cleanups, + Option<string> message, + bool killed) { // This should not occur because we only use the Future<list> to // facilitate chaining. @@ -1120,26 +1154,32 @@ void MesosContainerizerProcess::____destroy( } } - // A container is 'killed' if any isolator limited it. + // If any isolator limited the container then we mark it to killed. // Note: We may not see a limitation in time for it to be // registered. This could occur if the limitation (e.g., an OOM) // killed the executor and we triggered destroy() off the executor // exit. - bool killed = false; - string message; - if (container->limitations.size() > 0) { - killed = true; + if (!killed && container->limitations.size() > 0) { + string message_; foreach (const Limitation& limitation, container->limitations) { - message += limitation.message; + message_ += limitation.message; } - message = strings::trim(message); - } else { + message = strings::trim(message_); + } else if (!killed && message.isNone()) { message = "Executor terminated"; } containerizer::Termination termination; + + // Killed means that the container was either asked to be destroyed + // by the slave or was destroyed because an isolator limited the + // container. termination.set_killed(killed); - termination.set_message(message); + + if (message.isSome()) { + termination.set_message(message.get()); + } + if (status.isReady() && status.get().isSome()) { termination.set_status(status.get().get()); } @@ -1159,7 +1199,7 @@ void MesosContainerizerProcess::reaped(const ContainerID& containerId) LOG(INFO) << "Executor for container '" << containerId << "' has exited"; // The executor has exited so destroy the container. - destroy(containerId); + destroy(containerId, false); } @@ -1187,7 +1227,7 @@ void MesosContainerizerProcess::limited( } // The container has been affected by the limitation so destroy it. - destroy(containerId); + destroy(containerId, true); } http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp index ae61a0f..fb334e5 100644 --- a/src/slave/containerizer/mesos/containerizer.hpp +++ b/src/slave/containerizer/mesos/containerizer.hpp @@ -156,7 +156,7 @@ public: const ContainerID& containerId, int pipeWrite); - virtual void destroy(const ContainerID& containerId); + virtual void destroy(const ContainerID& containerId, bool killed); virtual process::Future<hashset<ContainerID>> containers(); @@ -199,24 +199,29 @@ private: pid_t _pid); // Continues 'destroy()' once isolators has completed. - void _destroy(const ContainerID& containerId); + void _destroy(const ContainerID& containerId, bool killed); // Continues 'destroy()' once all processes have been killed by the launcher. void __destroy( const ContainerID& containerId, - const process::Future<Nothing>& future); + const process::Future<Nothing>& future, + bool killed); // Continues '_destroy()' once we get the exit status of the executor. void ___destroy( const ContainerID& containerId, - const process::Future<Option<int>>& status); + const process::Future<Option<int>>& status, + const Option<std::string>& message, + bool killed); // Continues (and completes) '__destroy()' once all isolators have completed // cleanup. void ____destroy( const ContainerID& containerId, const process::Future<Option<int>>& status, - const process::Future<std::list<process::Future<Nothing>>>& cleanups); + const process::Future<std::list<process::Future<Nothing>>>& cleanups, + Option<std::string> message, + bool killed); // Call back for when an isolator limits a container and impacts the // processes. This will trigger container destruction. @@ -260,8 +265,13 @@ private: process::Future<Option<int>> status; // We keep track of the future that is waiting for all the - // isolator's futures, so that destroy will only start calling - // cleanup after all isolators has finished isolating. + // isolators' prepare futures, so that destroy will only start + // calling cleanup after all isolators has finished preparing. + process::Future<std::list<Option<CommandInfo>>> preparations; + + // We keep track of the future that is waiting for all the + // isolators' isolate futures, so that destroy will only start + // calling cleanup after all isolators has finished isolating. process::Future<std::list<Nothing>> isolation; // We keep track of any limitations received from each isolator so we can http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/tests/containerizer_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer_tests.cpp b/src/tests/containerizer_tests.cpp index 5991aa6..d100ab9 100644 --- a/src/tests/containerizer_tests.cpp +++ b/src/tests/containerizer_tests.cpp @@ -357,6 +357,70 @@ public: }; +class MockIsolatorProcess : public IsolatorProcess +{ +public: + MockIsolatorProcess() + { + EXPECT_CALL(*this, watch(_)) + .WillRepeatedly(Return(watchPromise.future())); + + EXPECT_CALL(*this, isolate(_, _)) + .WillRepeatedly(Return(Nothing())); + + EXPECT_CALL(*this, cleanup(_)) + .WillRepeatedly(Return(Nothing())); + + EXPECT_CALL(*this, prepare(_, _, _, _)) + .WillRepeatedly(Invoke(this, &MockIsolatorProcess::_prepare)); + } + + MOCK_METHOD1( + recover, + process::Future<Nothing>( + const std::list<mesos::slave::ExecutorRunState>&)); + + MOCK_METHOD4( + prepare, + process::Future<Option<CommandInfo>>( + const ContainerID&, + const ExecutorInfo&, + const std::string&, + const Option<std::string>&)); + + virtual process::Future<Option<CommandInfo>> _prepare( + const ContainerID& containerId, + const ExecutorInfo& executorInfo, + const std::string& directory, + const Option<std::string>& user) + { + return None(); + } + + MOCK_METHOD2( + isolate, + process::Future<Nothing>(const ContainerID&, pid_t)); + + MOCK_METHOD1( + watch, + process::Future<mesos::slave::Limitation>(const ContainerID&)); + + MOCK_METHOD2( + update, + process::Future<Nothing>(const ContainerID&, const Resources&)); + + MOCK_METHOD1( + usage, + process::Future<ResourceStatistics>(const ContainerID&)); + + MOCK_METHOD1( + cleanup, + process::Future<Nothing>(const ContainerID&)); + + Promise<mesos::slave::Limitation> watchPromise; +}; + + // Destroying a mesos containerizer while it is fetching should // complete without waiting for the fetching to finish. TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching) @@ -412,6 +476,84 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching) } +// Destroying a mesos containerizer while it is preparing should +// wait until isolators are finished preparing before destroying. +TEST_F(MesosContainerizerDestroyTest, DestroyWhilePreparing) +{ + slave::Flags flags; + Try<Launcher*> launcher = PosixLauncher::create(flags); + ASSERT_SOME(launcher); + vector<Owned<Isolator>> isolators; + + MockIsolatorProcess* isolatorProcess = new MockIsolatorProcess(); + + Owned<Isolator> isolator( + new Isolator(Owned<IsolatorProcess>((IsolatorProcess*)isolatorProcess))); + + isolators.push_back(isolator); + + Future<Nothing> prepare; + Promise<Option<CommandInfo>> promise; + // Simulate a long prepare from the isolator. + EXPECT_CALL(*isolatorProcess, prepare(_, _, _, _)) + .WillOnce(DoAll(FutureSatisfy(&prepare), + Return(promise.future()))); + + Fetcher fetcher; + + MockMesosContainerizerProcess* process = new MockMesosContainerizerProcess( + flags, + true, + &fetcher, + Owned<Launcher>(launcher.get()), + isolators); + + MesosContainerizer containerizer((Owned<MesosContainerizerProcess>(process))); + + ContainerID containerId; + containerId.set_value("test_container"); + + TaskInfo taskInfo; + CommandInfo commandInfo; + taskInfo.mutable_command()->MergeFrom(commandInfo); + + containerizer.launch( + containerId, + taskInfo, + CREATE_EXECUTOR_INFO("executor", "exit 0"), + os::getcwd(), + None(), + SlaveID(), + process::PID<Slave>(), + false); + + Future<containerizer::Termination> wait = containerizer.wait(containerId); + + AWAIT_READY(prepare); + + containerizer.destroy(containerId); + + // The container should not exit until prepare is complete. + ASSERT_TRUE(wait.isPending()); + + // Need to help the compiler to disambiguate between overloads. + Option<CommandInfo> option = commandInfo; + promise.set(option); + + AWAIT_READY(wait); + + containerizer::Termination termination = wait.get(); + + ASSERT_EQ( + "Container destroyed while preparing isolators", + termination.message()); + + ASSERT_TRUE(termination.killed()); + + ASSERT_FALSE(termination.has_status()); +} + + // This action destroys the container using the real launcher and // waits until the destroy is complete. ACTION_P(InvokeDestroyAndWait, launcher)
