Added a test to verify that task launch order is preserved. The agent should launch tasks in the same order in which they are received. In the task launch path, there are currently two asynchronous steps which may complete out of order: unschedule GC and task authorization.
This test simulates the reordering of the completion of the unschedule GC step and verifies that, despite the reordering, tasks will still launch in their original order. Review: https://reviews.apache.org/r/66145/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5dfa4a5b Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5dfa4a5b Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5dfa4a5b Branch: refs/heads/master Commit: 5dfa4a5b06df860a1ecb29702ff0e245432ceb42 Parents: 028ea54 Author: Meng Zhu <[email protected]> Authored: Thu Apr 5 17:44:35 2018 -0700 Committer: Greg Mann <[email protected]> Committed: Thu Apr 5 17:58:09 2018 -0700 ---------------------------------------------------------------------- src/tests/slave_tests.cpp | 148 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/5dfa4a5b/src/tests/slave_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp index 95d4143..14d39a2 100644 --- a/src/tests/slave_tests.cpp +++ b/src/tests/slave_tests.cpp @@ -141,6 +141,7 @@ using testing::_; using testing::AtMost; using testing::DoAll; using testing::Eq; +using testing::StrEq; using testing::Invoke; using testing::InvokeWithoutArgs; using testing::Return; @@ -4928,6 +4929,153 @@ TEST_F(SlaveTest, RemoveExecutorUponFailedTaskGroupLaunch) } +// This test ensures that tasks using the same executor are successfully +// launched in the order in which the agent receives the RunTask(Group)Message, +// even when we manually reorder the completion of the asynchronous unschedule +// GC step. See MESOS-8624. +TEST_F(SlaveTest, LaunchTasksReorderUnscheduleGC) +{ + Clock::pause(); + + master::Flags masterFlags = CreateMasterFlags(); + Try<Owned<cluster::Master>> master = StartMaster(masterFlags); + ASSERT_SOME(master); + + Owned<MasterDetector> detector = master.get()->createDetector(); + MockGarbageCollector mockGarbageCollector; + slave::Flags slaveFlags = CreateSlaveFlags(); + + // Start a mock slave. + Try<Owned<cluster::Slave>> slave = + StartSlave(detector.get(), &mockGarbageCollector, slaveFlags, true); + + ASSERT_SOME(slave); + ASSERT_NE(nullptr, slave.get()->mock()); + + slave.get()->start(); + + auto scheduler = std::make_shared<v1::MockHTTPScheduler>(); + + EXPECT_CALL(*scheduler, connected(_)) + .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO)); + + Future<v1::scheduler::Event::Subscribed> subscribed; + EXPECT_CALL(*scheduler, subscribed(_, _)) + .WillOnce(FutureArg<1>(&subscribed)); + + EXPECT_CALL(*scheduler, heartbeat(_)) + .WillRepeatedly(Return()); // Ignore heartbeats. + + Future<v1::scheduler::Event::Offers> offers; + EXPECT_CALL(*scheduler, offers(_, _)) + .WillOnce(FutureArg<1>(&offers)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + v1::scheduler::TestMesos mesos( + master.get()->pid, ContentType::PROTOBUF, scheduler); + + // Advance the clock to trigger both agent registration and a batch + // allocation. + Clock::advance(slaveFlags.registration_backoff_factor); + Clock::advance(masterFlags.allocation_interval); + + AWAIT_READY(subscribed); + + AWAIT_READY(offers); + ASSERT_FALSE(offers->offers().empty()); + + const v1::Offer& offer = offers->offers(0); + const v1::AgentID& agentId = offer.agent_id(); + + v1::Resources resources = + v1::Resources::parse("cpus:0.1;mem:32;disk:32").get(); + + v1::FrameworkID frameworkId(subscribed->framework_id()); + + v1::ExecutorInfo executorInfo = v1::createExecutorInfo( + "default", None(), resources, v1::ExecutorInfo::DEFAULT, frameworkId); + + // Create two separate task groups that use the same executor. + v1::TaskInfo taskInfo1 = v1::createTask(agentId, resources, "sleep 1000"); + v1::TaskGroupInfo taskGroup1 = v1::createTaskGroupInfo({taskInfo1}); + + v1::TaskInfo taskInfo2 = v1::createTask(agentId, resources, "sleep 1000"); + v1::TaskGroupInfo taskGroup2 = v1::createTaskGroupInfo({taskInfo2}); + + v1::Offer::Operation launchGroup1 = + v1::LAUNCH_GROUP(executorInfo, taskGroup1); + v1::Offer::Operation launchGroup2 = + v1::LAUNCH_GROUP(executorInfo, taskGroup2); + + // The `unschedule()` function is used to prevent premature garbage + // collection when the executor directory already exists due to a + // previously-launched task. Simulate this scenario by creating the + // executor directory manually. + string path = paths::getExecutorPath( + slaveFlags.work_dir, + devolve(agentId), + devolve(frameworkId), + devolve(executorInfo.executor_id())); + + Try<Nothing> mkdir = os::mkdir(path, true); + CHECK_SOME(mkdir); + + Promise<bool> promise1; + + // Catch the unschedule GC step and reorder the task group launches by + // pausing the processing of `taskGroup1` while allowing the processing + // of `taskGroup1` to continue. + EXPECT_CALL(mockGarbageCollector, unschedule(StrEq(path))) + .WillOnce(Return(promise1.future())) + .WillRepeatedly(Return(true)); + + Future<v1::scheduler::Event::Update> taskStarting1, taskStarting2; + Future<v1::scheduler::Event::Update> taskRunning1, taskRunning2; + EXPECT_CALL(*scheduler, update(_, TaskStatusUpdateTaskIdEq(taskInfo1))) + .WillOnce(DoAll( + FutureArg<1>(&taskStarting1), + v1::scheduler::SendAcknowledge(frameworkId, agentId))) + .WillOnce(DoAll( + FutureArg<1>(&taskRunning1), + v1::scheduler::SendAcknowledge(frameworkId, agentId))); + EXPECT_CALL(*scheduler, update(_, TaskStatusUpdateTaskIdEq(taskInfo2))) + .WillOnce(DoAll( + FutureArg<1>(&taskStarting2), + v1::scheduler::SendAcknowledge(frameworkId, agentId))) + .WillOnce(DoAll( + FutureArg<1>(&taskRunning2), + v1::scheduler::SendAcknowledge(frameworkId, agentId))); + + // Launch the two task groups. + mesos.send( + v1::createCallAccept(frameworkId, offer, {launchGroup1, launchGroup2})); + + // Settle the clock to finish the processing of `taskGroup2`. + Clock::settle(); + + ASSERT_TRUE(taskStarting2.isPending()); + + // Resume the processing of `taskGroup1`. + promise1.set(true); + + // If taskgroup2 tries to launch the executor first (i.e. if the order is + // not corrected by the agent), taskgroup2 will be subsequently dropped. The + // successful launch of both tasks verifies that the agent enforces the task + // launch order. + AWAIT_READY(taskStarting1); + AWAIT_READY(taskStarting2); + + ASSERT_EQ(v1::TASK_STARTING, taskStarting1->status().state()); + ASSERT_EQ(v1::TASK_STARTING, taskStarting2->status().state()); + + AWAIT_READY(taskRunning1); + AWAIT_READY(taskRunning2); + + ASSERT_EQ(v1::TASK_RUNNING, taskRunning1->status().state()); + ASSERT_EQ(v1::TASK_RUNNING, taskRunning2->status().state()); +} + + // This test ensures that agent sends ExitedExecutorMessage when the task // fails to launch due to task authorization failure and that master's executor // bookkeeping entry is removed.
