Added a test for executor shutdown grace period. Review: https://reviews.apache.org/r/44994/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/201ab8f3 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/201ab8f3 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/201ab8f3 Branch: refs/heads/master Commit: 201ab8f3abc83e9235afef551ef3d858ca1e8224 Parents: d322aa9 Author: Alexander Rukletsov <ruklet...@gmail.com> Authored: Thu Mar 24 17:30:13 2016 +0100 Committer: Alexander Rukletsov <al...@apache.org> Committed: Thu Mar 24 18:21:01 2016 +0100 ---------------------------------------------------------------------- src/slave/slave.hpp | 14 ++--- src/tests/slave_tests.cpp | 139 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/201ab8f3/src/slave/slave.hpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp index bc68cf1..3ba335f 100644 --- a/src/slave/slave.hpp +++ b/src/slave/slave.hpp @@ -398,6 +398,13 @@ public: // Returns the resource usage information for all executors. process::Future<ResourceUsage> usage(); + // Handle the second phase of shutting down an executor for those + // executors that have not properly shutdown within a timeout. + void shutdownExecutorTimeout( + const FrameworkID& frameworkId, + const ExecutorID& executorId, + const ContainerID& containerId); + private: void _authenticate(); void authenticationTimeout(process::Future<bool> future); @@ -409,13 +416,6 @@ private: // exited. void _shutdownExecutor(Framework* framework, Executor* executor); - // Handle the second phase of shutting down an executor for those - // executors that have not properly shutdown within a timeout. - void shutdownExecutorTimeout( - const FrameworkID& frameworkId, - const ExecutorID& executorId, - const ContainerID& containerId); - // Inner class used to namespace HTTP route handlers (see // slave/http.cpp for implementations). class Http http://git-wip-us.apache.org/repos/asf/mesos/blob/201ab8f3/src/tests/slave_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp index 5f47a71..69d4894 100644 --- a/src/tests/slave_tests.cpp +++ b/src/tests/slave_tests.cpp @@ -3261,6 +3261,145 @@ TEST_F(SlaveTest, HTTPSchedulerSlaveRestart) driver.join(); } + +// Ensures that if `ExecutorInfo.shutdown_grace_period` is set, it +// overrides the default value from the agent flag, is observed by +// executor, and is enforced by the agent. +TEST_F(SlaveTest, ExecutorShutdownGracePeriod) +{ + master::Flags masterFlags = CreateMasterFlags(); + Try<Owned<cluster::Master>> master = StartMaster(masterFlags); + ASSERT_SOME(master); + + MockExecutor exec(DEFAULT_EXECUTOR_ID); + + TestContainerizer containerizer(&exec); + + Owned<MasterDetector> detector = master.get()->createDetector(); + + slave::Flags agentFlags = CreateSlaveFlags(); + Try<Owned<cluster::Slave>> slave = + StartSlave(detector.get(), &containerizer, agentFlags); + ASSERT_SOME(slave); + + MockScheduler sched; + MesosSchedulerDriver driver( + &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL); + + // We need framework's ID to shutdown the executor later on. + Future<FrameworkID> frameworkId; + EXPECT_CALL(sched, registered(&driver, _, _)) + .WillOnce(FutureArg<1>(&frameworkId)); + + Future<vector<Offer>> offers; + EXPECT_CALL(sched, resourceOffers(&driver, _)) + .WillOnce(FutureArg<1>(&offers)); + + driver.start(); + + AWAIT_READY(offers); + EXPECT_NE(0u, offers->size()); + Offer offer = offers.get()[0]; + + // Customize executor shutdown grace period to be larger than the + // default agent flag value, so that we can check it is respected. + Duration customGracePeriod = agentFlags.executor_shutdown_grace_period * 2; + + ExecutorInfo executorInfo(DEFAULT_EXECUTOR_INFO); + executorInfo.mutable_shutdown_grace_period()->set_nanoseconds( + customGracePeriod.ns()); + + TaskInfo task; + task.set_name(""); + task.mutable_task_id()->set_value("2"); + task.mutable_slave_id()->MergeFrom(offer.slave_id()); + task.mutable_resources()->MergeFrom(offer.resources()); + task.mutable_executor()->MergeFrom(executorInfo); + + Future<TaskStatus> statusRunning; + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .WillOnce(FutureArg<1>(&statusRunning)); + + EXPECT_CALL(exec, registered(_, _, _, _)) + .Times(1); + + Future<TaskInfo> receivedTask; + EXPECT_CALL(exec, launchTask(_, _)) + .WillOnce(DoAll(SendStatusUpdateFromTask(TASK_RUNNING), + FutureArg<1>(&receivedTask))); + + driver.launchTasks(offer.id(), {task}); + + AWAIT_READY(statusRunning); + EXPECT_EQ(TASK_RUNNING, statusRunning->state()); + EXPECT_EQ(customGracePeriod.ns(), + receivedTask->executor().shutdown_grace_period().nanoseconds()); + + // If executor is asked to shutdown but fails to do so within the grace + // shutdown period, the shutdown is enforced by the agent. The agent + // adjusts its timeout according to `ExecutorInfo.shutdown_grace_period`. + // + // NOTE: Executors relying on the executor driver have a built-in suicide + // mechanism (`ShutdownProcess`), that kills the OS process where the + // executor is running after the grace period ends. This mechanism is + // disabled in tests, hence we do not observe crashes induced by this test. + // The test containerizer only accepts "local" executors and it considers + // them "terminated" only once destroy is called. + + EXPECT_CALL(exec, shutdown(_)) + .Times(AtMost(1)) + .WillOnce(Return()); + + // Once the grace period ends, the agent forcibly shuts down the executor. + Future<Nothing> executorShutdownTimeout = + FUTURE_DISPATCH(slave.get()->pid, &Slave::shutdownExecutorTimeout); + + Future<TaskStatus> statusFailed; + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .WillOnce(FutureArg<1>(&statusFailed)); + + Future<ExecutorID> lostExecutorId; + EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _)) + .WillOnce(FutureArg<1>(&lostExecutorId)); + + // Ask executor to shutdown. There is no support in the scheduler + // driver for shutting down executors, hence we have to spoof it. + AWAIT_READY(frameworkId); + ShutdownExecutorMessage shutdownMessage; + shutdownMessage.mutable_executor_id()->CopyFrom(DEFAULT_EXECUTOR_ID); + shutdownMessage.mutable_framework_id()->CopyFrom(frameworkId.get()); + post(master.get()->pid, slave.get()->pid, shutdownMessage); + + // Ensure the `ShutdownExecutorMessage` message is + // received by the agent before we start the timer. + Clock::pause(); + Clock::settle(); + Clock::advance(agentFlags.executor_shutdown_grace_period); + Clock::settle(); + + // The executor shutdown timeout should not have fired, since the + // `ExecutorInfo` contains a grace period larger than the agent flag. + EXPECT_TRUE(executorShutdownTimeout.isPending()); + + // Trigger the shutdown grace period from the `ExecutorInfo` + // (note that is is 2x the agent flag). + Clock::advance(agentFlags.executor_shutdown_grace_period); + + AWAIT_READY(executorShutdownTimeout); + + AWAIT_READY(statusFailed); + EXPECT_EQ(TASK_FAILED, statusFailed->state()); + EXPECT_EQ(TaskStatus::REASON_EXECUTOR_TERMINATED, + statusFailed->reason()); + + AWAIT_EXPECT_EQ(DEFAULT_EXECUTOR_ID, lostExecutorId); + + Clock::resume(); + + driver.stop(); + driver.join(); +} + } // namespace tests { } // namespace internal { } // namespace mesos {