Added a persistent volume test for slave recovery. Review: https://reviews.apache.org/r/36683
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a0304ca5 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a0304ca5 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a0304ca5 Branch: refs/heads/master Commit: a0304ca5ca888e4ecd3d83422983aed1ed295739 Parents: 7f29a72 Author: Jie Yu <[email protected]> Authored: Tue Jul 21 23:00:16 2015 -0700 Committer: Jie Yu <[email protected]> Committed: Wed Jul 22 15:48:40 2015 -0700 ---------------------------------------------------------------------- src/tests/persistent_volume_tests.cpp | 123 +++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/a0304ca5/src/tests/persistent_volume_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/persistent_volume_tests.cpp b/src/tests/persistent_volume_tests.cpp index c5f72d5..8b791ac 100644 --- a/src/tests/persistent_volume_tests.cpp +++ b/src/tests/persistent_volume_tests.cpp @@ -591,6 +591,129 @@ TEST_F(PersistentVolumeTest, AccessPersistentVolume) Shutdown(); } + +// This test verifies that persistent volumes are recovered properly +// after the slave restarts. The idea is to launch a command which +// keeps testing if the persistent volume exists, and fails if it does +// not. So the framework should not receive a TASK_FAILED after the +// slave finishes recovery. +TEST_F(PersistentVolumeTest, SlaveRecovery) +{ + FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; + frameworkInfo.set_role("role1"); + frameworkInfo.set_checkpoint(true); + + Try<PID<Master>> master = StartMaster(MasterFlags({frameworkInfo})); + ASSERT_SOME(master); + + slave::Flags slaveFlags = CreateSlaveFlags(); + + slaveFlags.resources = "cpus:2;mem:1024;disk(role1):1024"; + + Try<PID<Slave>> slave = StartSlave(slaveFlags); + ASSERT_SOME(slave); + + MockScheduler sched; + MesosSchedulerDriver driver( + &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); + + Future<FrameworkID> frameworkId; + EXPECT_CALL(sched, registered(&driver, _, _)) + .WillOnce(FutureArg<1>(&frameworkId)); + + Future<vector<Offer>> offers; + EXPECT_CALL(sched, resourceOffers(&driver, _)) + .WillOnce(FutureArg<1>(&offers)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + driver.start(); + + AWAIT_READY(frameworkId); + + AWAIT_READY(offers); + EXPECT_FALSE(offers.get().empty()); + + Offer offer = offers.get()[0]; + + Resources volume = createPersistentVolume( + Megabytes(64), + "role1", + "id1", + "path1"); + + // Create a task which writes a file in the persistent volume. + Resources taskResources = + Resources::parse("cpus:1;mem:128;disk(role1):32").get() + volume; + + TaskInfo task = createTask( + offer.slave_id(), + taskResources, + "while true; do test -d path1; done"); + + Future<TaskStatus> status1; + Future<TaskStatus> status2; + EXPECT_CALL(sched, statusUpdate(&driver, _)) + .WillOnce(FutureArg<1>(&status1)) + .WillOnce(FutureArg<1>(&status2)); + + Future<Nothing> ack = + FUTURE_DISPATCH(_, &Slave::_statusUpdateAcknowledgement); + + driver.acceptOffers( + {offer.id()}, + {CREATE(volume), LAUNCH({task})}); + + AWAIT_READY(status1); + EXPECT_EQ(task.task_id(), status1.get().task_id()); + EXPECT_EQ(TASK_RUNNING, status1.get().state()); + + // Wait for the ACK to be checkpointed. + AWAIT_READY(ack); + + // Restart the slave. + Stop(slave.get()); + + Future<SlaveReregisteredMessage> slaveReregisteredMessage = + FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _); + + Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover); + + slave = StartSlave(slaveFlags); + ASSERT_SOME(slave); + + Clock::pause(); + + AWAIT_READY(_recover); + + // Wait for slave to schedule reregister timeout. + Clock::settle(); + + // Ensure the slave considers itself recovered. + Clock::advance(slave::EXECUTOR_REREGISTER_TIMEOUT); + + Clock::resume(); + + // Wait for the slave to re-register. + AWAIT_READY(slaveReregisteredMessage); + + // The framework should not receive a TASK_FAILED here since the + // persistent volume shouldn't be affected even if slave restarts. + ASSERT_TRUE(status2.isPending()); + + // NOTE: We kill the task and wait for TASK_KILLED here to make sure + // any pending status updates are received by the framework. + driver.killTask(task.task_id()); + + AWAIT_READY(status2); + EXPECT_EQ(task.task_id(), status2.get().task_id()); + EXPECT_EQ(TASK_KILLED, status2.get().state()); + + driver.stop(); + driver.join(); + + Shutdown(); +} + } // namespace tests { } // namespace internal { } // namespace mesos {
