Added masterSlaveLostHook. This patch adds a new masterSlaveLost hook to enable modules to clean up after lost slaves events (as in networking modules, where we want to avoid leaking IPs).
Review: https://reviews.apache.org/r/38575 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1d86932c Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1d86932c Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1d86932c Branch: refs/heads/master Commit: 1d86932ce424f3e7b921cc6a436051a45f704dd0 Parents: f6706e8 Author: Niklas Nielsen <[email protected]> Authored: Wed Sep 23 15:37:21 2015 -0700 Committer: Niklas Q. Nielsen <[email protected]> Committed: Wed Sep 23 15:37:24 2015 -0700 ---------------------------------------------------------------------- include/mesos/hook.hpp | 10 +++++++ src/examples/test_hook_module.cpp | 26 +++++++++++++++++ src/hook/manager.cpp | 12 ++++++++ src/hook/manager.hpp | 2 ++ src/master/master.cpp | 5 ++++ src/tests/hook_tests.cpp | 53 ++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/include/mesos/hook.hpp ---------------------------------------------------------------------- diff --git a/include/mesos/hook.hpp b/include/mesos/hook.hpp index 2353602..2fe060e 100644 --- a/include/mesos/hook.hpp +++ b/include/mesos/hook.hpp @@ -62,6 +62,16 @@ public: return None(); } + + // This hook is called when an Agent is removed i.e. deemed lost by the + // master. The hook is invoked after all frameworks have been informed about + // the loss. + virtual Try<Nothing> masterSlaveLostHook(const SlaveInfo& slaveInfo) + { + return Nothing(); + } + + // This environment decorator hook is called from within slave when // launching a new executor. A module implementing the hook creates // and returns a set of environment variables. These environment http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/src/examples/test_hook_module.cpp ---------------------------------------------------------------------- diff --git a/src/examples/test_hook_module.cpp b/src/examples/test_hook_module.cpp index 5c4d71a..c09d7dd 100644 --- a/src/examples/test_hook_module.cpp +++ b/src/examples/test_hook_module.cpp @@ -108,6 +108,32 @@ public: return labels; } + virtual Try<Nothing> masterSlaveLostHook(const SlaveInfo& slaveInfo) + { + LOG(INFO) << "Executing 'masterSlaveLostHook' in slave '" + << slaveInfo.id() << "'"; + + // TODO(nnielsen): Add argument to signal(), so we can filter messages from + // the `masterSlaveLostHook` from `slaveRemoveExecutorHook`. + // NOTE: Will not be a problem **as long as** the test doesn't start any + // tasks. + HookProcess hookProcess; + process::spawn(&hookProcess); + Future<Nothing> future = + process::dispatch(hookProcess, &HookProcess::await); + + process::dispatch(hookProcess, &HookProcess::signal); + + // Make sure we don't terminate the process before the message self-send has + // completed. + future.await(); + + process::terminate(hookProcess); + process::wait(hookProcess); + + return Nothing(); + } + // TODO(nnielsen): Split hook tests into multiple modules to avoid // interference. virtual Result<Labels> slaveRunTaskLabelDecorator( http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/src/hook/manager.cpp ---------------------------------------------------------------------- diff --git a/src/hook/manager.cpp b/src/hook/manager.cpp index 691976e..52d53f0 100644 --- a/src/hook/manager.cpp +++ b/src/hook/manager.cpp @@ -133,6 +133,18 @@ Labels HookManager::masterLaunchTaskLabelDecorator( } +void HookManager::masterSlaveLostHook(const SlaveInfo& slaveInfo) +{ + foreachpair (const string& name, Hook* hook, availableHooks) { + Try<Nothing> result = hook->masterSlaveLostHook(slaveInfo); + if (result.isError()) { + LOG(WARNING) << "Master slave-lost hook failed for module '" + << name << "': " << result.error(); + } + } +} + + Labels HookManager::slaveRunTaskLabelDecorator( const TaskInfo& taskInfo, const ExecutorInfo& executorInfo, http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/src/hook/manager.hpp ---------------------------------------------------------------------- diff --git a/src/hook/manager.hpp b/src/hook/manager.hpp index a517c05..d35a762 100644 --- a/src/hook/manager.hpp +++ b/src/hook/manager.hpp @@ -45,6 +45,8 @@ public: const FrameworkInfo& frameworkInfo, const SlaveInfo& slaveInfo); + static void masterSlaveLostHook(const SlaveInfo& slaveInfo); + static Labels slaveRunTaskLabelDecorator( const TaskInfo& taskInfo, const ExecutorInfo& executorInfo, http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 5ca1941..0a40bc3 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -5998,6 +5998,11 @@ void Master::_removeSlave( message.mutable_slave_id()->MergeFrom(slaveInfo.id()); framework->send(message); } + + // Finally, notify the `SlaveLost` hooks. + if (HookManager::hooksAvailable()) { + HookManager::masterSlaveLostHook(slaveInfo); + } } http://git-wip-us.apache.org/repos/asf/mesos/blob/1d86932c/src/tests/hook_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/hook_tests.cpp b/src/tests/hook_tests.cpp index f85d917..c9d35fb 100644 --- a/src/tests/hook_tests.cpp +++ b/src/tests/hook_tests.cpp @@ -18,6 +18,7 @@ #include <mesos/module.hpp> +#include <process/clock.hpp> #include <process/future.hpp> #include <process/gmock.hpp> #include <process/pid.hpp> @@ -61,6 +62,7 @@ using mesos::internal::slave::Fetcher; using mesos::internal::slave::MesosContainerizer; using mesos::internal::slave::Slave; +using process::Clock; using process::Future; using process::PID; using process::Shared; @@ -71,6 +73,7 @@ using std::vector; using testing::_; using testing::DoAll; +using testing::Eq; using testing::Return; using testing::SaveArg; @@ -212,6 +215,56 @@ TEST_F(HookTest, VerifyMasterLaunchTaskHook) } +// This test forces a `SlaveLost` event. When this happens, we expect the +// `masterSlaveLostHook` to be invoked and await an internal libprocess event +// to trigger in the module code. +TEST_F(HookTest, MasterSlaveLostHookTest) +{ + Future<HookExecuted> hookFuture = FUTURE_PROTOBUF(HookExecuted(), _, _); + + DROP_MESSAGES(Eq(PingSlaveMessage().GetTypeName()), _, _); + + master::Flags masterFlags = CreateMasterFlags(); + + // Speed up timeout cycles. + masterFlags.slave_ping_timeout = Seconds(1); + masterFlags.max_slave_ping_timeouts = 1; + + Try<PID<Master>> master = StartMaster(masterFlags); + ASSERT_SOME(master); + + MockExecutor exec(DEFAULT_EXECUTOR_ID); + + TestContainerizer containerizer(&exec); + + Future<SlaveRegisteredMessage> slaveRegisteredMessage = + FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); + + // Start a mock Agent since we aren't testing the slave hooks. + Try<PID<Slave>> slave = StartSlave(&containerizer); + ASSERT_SOME(slave); + + // Make sure Agent is up and running. + AWAIT_READY(slaveRegisteredMessage); + + // Forward clock slave timeout. + Duration totalTimeout = + masterFlags.slave_ping_timeout * masterFlags.max_slave_ping_timeouts; + + Clock::pause(); + Clock::advance(totalTimeout); + Clock::settle(); + Clock::resume(); + + // `masterSlaveLostHook()` should be called from within module code. + AWAIT_READY(hookFuture); + + // TODO(nnielsen): Verify hook signal type. + + Shutdown(); // Must shutdown before 'containerizer' gets deallocated. +} + + // Test that the environment decorator hook adds a new environment // variable to the executor runtime. // Test hook adds a new environment variable "FOO" to the executor
