Repository: mesos Updated Branches: refs/heads/master 89186759a -> b2f73095f
Remove the checkpoint variable entirely from slave/flags.hpp. As a number of tests rely on the checkpointing flag to be false, a few tests had to be adapted. Removed the following test as the tested logic is specific to (old) non-checkpointing slaves: SlaveRecoveryTest.NonCheckpointingSlave: This test checks whether a non-checkpointing slave is not scheduled to a checkpointing framework. It can be removed as all slaves are now checkpointing slaves. Review: https://reviews.apache.org/r/31539 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b2f73095 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b2f73095 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b2f73095 Branch: refs/heads/master Commit: b2f73095fd168a75c2754f26d5368f4cff414752 Parents: 8918675 Author: Joerg Schad <[email protected]> Authored: Mon Mar 23 17:03:28 2015 -0700 Committer: Adam B <[email protected]> Committed: Mon Mar 23 18:53:19 2015 -0700 ---------------------------------------------------------------------- include/mesos/mesos.proto | 2 + src/slave/flags.hpp | 12 ++--- src/slave/slave.cpp | 28 +++++------ src/tests/disk_quota_tests.cpp | 1 - src/tests/docker_containerizer_tests.cpp | 10 ---- src/tests/fault_tolerance_tests.cpp | 4 +- src/tests/gc_tests.cpp | 10 ++-- src/tests/master_allocator_tests.cpp | 5 +- src/tests/master_authorization_tests.cpp | 22 ++++----- src/tests/master_tests.cpp | 39 +++++---------- src/tests/master_validation_tests.cpp | 1 - src/tests/mesos.cpp | 3 -- src/tests/partition_tests.cpp | 5 +- src/tests/persistent_volume_tests.cpp | 3 -- src/tests/reconciliation_tests.cpp | 3 +- src/tests/slave_recovery_tests.cpp | 67 +------------------------- src/tests/status_update_manager_tests.cpp | 25 +++------- 17 files changed, 62 insertions(+), 178 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/include/mesos/mesos.proto ---------------------------------------------------------------------- diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index faa56cc..3c592d5 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -315,6 +315,8 @@ message SlaveInfo { repeated Resource resources = 3; repeated Attribute attributes = 5; optional SlaveID id = 6; + // TODO(joerg84): Remove checkpoint field as with 0.22.0 + // slave checkpointing is enabled for all slaves (MESOS-2317). optional bool checkpoint = 7 [default = false]; } http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/slave/flags.hpp ---------------------------------------------------------------------- diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp index dbaf5f5..3da71af 100644 --- a/src/slave/flags.hpp +++ b/src/slave/flags.hpp @@ -47,7 +47,6 @@ class Flags : public logging::Flags { public: Flags() - : checkpoint(true) { add(&Flags::hostname, "hostname", @@ -180,17 +179,14 @@ public: "reconnect: Reconnect with any old live executors.\n" "cleanup : Kill any old live executors and exit.\n" " Use this option when doing an incompatible slave\n" - " or executor upgrade!).\n" - "NOTE: If checkpointed slave doesn't exist, no recovery is performed\n" - " and the slave registers with the master as a new slave.", + " or executor upgrade!).", "reconnect"); add(&Flags::recovery_timeout, "recovery_timeout", "Amount of time alloted for the slave to recover. If the slave takes\n" "longer than recovery_timeout to recover, any executors that are\n" - "waiting to reconnect to the slave will self-terminate.\n" - "NOTE: This flag is only applicable when checkpoint is enabled.\n", + "waiting to reconnect to the slave will self-terminate.\n", RECOVERY_TIMEOUT); add(&Flags::strict, @@ -482,9 +478,7 @@ public: double gc_disk_headroom; Duration disk_watch_interval; Duration resource_monitoring_interval; - // TODO(cmaloney): Remove checkpoint variable entirely, fixing tests - // which depend upon it. See MESOS-444 for more details. - bool checkpoint; + std::string recover; Duration recovery_timeout; bool strict; http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index f1f2100..c7e65a6 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -325,15 +325,15 @@ void Slave::initialize() info.set_port(self().address.port); info.mutable_resources()->CopyFrom(resources.get()); info.mutable_attributes()->CopyFrom(attributes); - info.set_checkpoint(flags.checkpoint); + // Checkpointing of slaves is always enabled. + info.set_checkpoint(true); LOG(INFO) << "Slave hostname: " << info.hostname(); - LOG(INFO) << "Slave checkpoint: " << stringify(flags.checkpoint); - if (!flags.checkpoint) { - LOG(WARNING) << "Disabling checkpointing is deprecated and the --checkpoint" - " flag will be removed in a future release. Please avoid" - " using this flag"; - } + // Checkpointing of slaves is always enabled. + // We keep this line to be compatible with + // older monitoring tools. + // TODO(joerg84): Delete after 0.23. + LOG(INFO) << "Slave checkpoint: " << stringify(true); statusUpdateManager->initialize(defer(self(), &Slave::forward, lambda::_1)); @@ -786,16 +786,14 @@ void Slave::registered(const UPID& from, const SlaveID& slaveId) info.mutable_id()->CopyFrom(slaveId); // Store the slave id. - if (flags.checkpoint) { - // Create the slave meta directory. - paths::createSlaveDirectory(metaDir, slaveId); + // Create the slave meta directory. + paths::createSlaveDirectory(metaDir, slaveId); - // Checkpoint slave info. - const string& path = paths::getSlaveInfoPath(metaDir, slaveId); + // Checkpoint slave info. + const string& path = paths::getSlaveInfoPath(metaDir, slaveId); - VLOG(1) << "Checkpointing SlaveInfo to '" << path << "'"; - CHECK_SOME(state::checkpoint(path, info)); - } + VLOG(1) << "Checkpointing SlaveInfo to '" << path << "'"; + CHECK_SOME(state::checkpoint(path, info)); // If we don't get a ping from the master, trigger a // re-registration. This needs to be done once registered, http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/disk_quota_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/disk_quota_tests.cpp b/src/tests/disk_quota_tests.cpp index 9c3a881..ca5ea86 100644 --- a/src/tests/disk_quota_tests.cpp +++ b/src/tests/disk_quota_tests.cpp @@ -413,7 +413,6 @@ TEST_F(DiskQuotaTest, SlaveRecovery) ASSERT_SOME(master); slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; flags.isolation = "posix/cpu,posix/mem,posix/disk"; flags.container_disk_watch_interval = Milliseconds(1); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/docker_containerizer_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/docker_containerizer_tests.cpp b/src/tests/docker_containerizer_tests.cpp index 06cd3d8..fdd706a 100644 --- a/src/tests/docker_containerizer_tests.cpp +++ b/src/tests/docker_containerizer_tests.cpp @@ -1777,11 +1777,6 @@ TEST_F(DockerContainerizerTest, ROOT_DOCKER_SlaveRecoveryTaskContainer) slave::Flags flags = CreateSlaveFlags(); - // Setup recovery slave flags. - flags.checkpoint = true; - flags.recover = "reconnect"; - flags.strict = true; - MockDocker* mockDocker = new MockDocker(tests::flags.docker); Shared<Docker> docker(mockDocker); @@ -1950,11 +1945,6 @@ TEST_F(DockerContainerizerTest, slave::Flags flags = CreateSlaveFlags(); - // Setup recovery slave flags. - flags.checkpoint = true; - flags.recover = "reconnect"; - flags.strict = true; - MockDocker* mockDocker = new MockDocker(tests::flags.docker); Shared<Docker> docker(mockDocker); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/fault_tolerance_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp index 9ac75b1..a637c32 100644 --- a/src/tests/fault_tolerance_tests.cpp +++ b/src/tests/fault_tolerance_tests.cpp @@ -120,7 +120,9 @@ TEST_F(FaultToleranceTest, SlaveLost) EXPECT_CALL(sched, slaveLost(&driver, offers.get()[0].slave_id())) .WillOnce(FutureSatisfy(&slaveLost)); - ShutdownSlaves(); + // Stop the checkpointing slave with explicit shutdown message + // so that the master does not wait for it to reconnect. + Stop(slave.get(), true); AWAIT_READY(offerRescinded); AWAIT_READY(slaveLost); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/gc_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/gc_tests.cpp b/src/tests/gc_tests.cpp index deaa6b1..5666cd1 100644 --- a/src/tests/gc_tests.cpp +++ b/src/tests/gc_tests.cpp @@ -249,6 +249,8 @@ TEST_F(GarbageCollectorTest, Prune) class GarbageCollectorIntegrationTest : public MesosTest {}; +// This test ensures that garbage collection removes +// the slave working directory after a slave restart. TEST_F(GarbageCollectorIntegrationTest, Restart) { Try<PID<Master> > master = StartMaster(); @@ -313,8 +315,6 @@ TEST_F(GarbageCollectorIntegrationTest, Restart) ASSERT_TRUE(os::exists(slaveDir)); - Clock::pause(); - EXPECT_CALL(exec, shutdown(_)) .Times(AtMost(1)); @@ -325,10 +325,14 @@ TEST_F(GarbageCollectorIntegrationTest, Restart) EXPECT_CALL(sched, slaveLost(_, _)) .WillOnce(FutureSatisfy(&slaveLost)); - Stop(slave.get()); + // Stop the slave with explicit shutdown as otherwise with + // checkpointing the master will wait for the slave to reconnect. + Stop(slave.get(), true); AWAIT_READY(slaveLost); + Clock::pause(); + Future<Nothing> schedule = FUTURE_DISPATCH(_, &GarbageCollectorProcess::schedule); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/master_allocator_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_allocator_tests.cpp b/src/tests/master_allocator_tests.cpp index a432d02..03a1bb8 100644 --- a/src/tests/master_allocator_tests.cpp +++ b/src/tests/master_allocator_tests.cpp @@ -511,7 +511,6 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited) TestContainerizer containerizer(execs); slave::Flags flags = this->CreateSlaveFlags(); - flags.resources = Some("cpus:3;mem:1024"); EXPECT_CALL(this->allocator, addSlave(_, _, _, _)); @@ -704,7 +703,9 @@ TYPED_TEST(MasterAllocatorTest, SlaveLost) EXPECT_CALL(sched, slaveLost(_, _)); - this->ShutdownSlaves(); + // Stop the checkpointing slave with explicit shutdown message + // so that the master does not wait for it to reconnect. + this->Stop(slave1.get(), true); AWAIT_READY(removeSlave); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/master_authorization_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_authorization_tests.cpp b/src/tests/master_authorization_tests.cpp index ff706ed..ac79303 100644 --- a/src/tests/master_authorization_tests.cpp +++ b/src/tests/master_authorization_tests.cpp @@ -341,8 +341,9 @@ TEST_F(MasterAuthorizationTest, SlaveRemoved) EXPECT_CALL(sched, slaveLost(&driver, _)) .WillOnce(FutureSatisfy(&slaveLost)); - // Now stop the slave. - Stop(slave.get()); + // Stop the slave with explicit shutdown as otherwise with + // checkpointing the master will wait for the slave to reconnect. + Stop(slave.get(), true); AWAIT_READY(slaveLost); @@ -385,16 +386,12 @@ TEST_F(MasterAuthorizationTest, SlaveRemoved) TEST_F(MasterAuthorizationTest, SlaveDisconnected) { MockAuthorizer authorizer; - Try<PID<Master> > master = StartMaster(&authorizer); + Try<PID<Master>> master = StartMaster(&authorizer); ASSERT_SOME(master); MockExecutor exec(DEFAULT_EXECUTOR_ID); - // Create a checkpointing slave so that a disconnected slave is not - // immediately removed. - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - Try<PID<Slave> > slave = StartSlave(&exec, flags); + Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); MockScheduler sched; @@ -436,8 +433,9 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected) Future<Nothing> deactivateSlave = FUTURE_DISPATCH(_, &MesosAllocatorProcess::deactivateSlave); - // Now stop the slave. - Stop(slave.get()); + // Stop the checkpointing slave with explicit shutdown message + // so that the master does not wait for it to reconnect. + Stop(slave.get(), true); AWAIT_READY(deactivateSlave); @@ -465,10 +463,10 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected) EXPECT_EQ(1u, stats.values["master/tasks_lost"]); EXPECT_EQ(1u, stats.values.count( - "master/task_lost/source_master/reason_slave_disconnected")); + "master/task_lost/source_master/reason_slave_removed")); EXPECT_EQ( 1u, - stats.values["master/task_lost/source_master/reason_slave_disconnected"]); + stats.values["master/task_lost/source_master/reason_slave_removed"]); driver.stop(); driver.join(); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/master_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp index e69348b..78e775d 100644 --- a/src/tests/master_tests.cpp +++ b/src/tests/master_tests.cpp @@ -456,9 +456,8 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition) MockExecutor exec(DEFAULT_EXECUTOR_ID); - // Start a checkpointing slave. + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = CreateSlaveFlags(); - slaveFlags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, slaveFlags); ASSERT_SOME(slave); @@ -1301,7 +1300,6 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest) Resources twoSlaves = fullSlave + fullSlave; slave::Flags flags = CreateSlaveFlags(); - flags.resources = Option<string>(stringify(fullSlave)); Try<PID<Slave> > slave1 = StartSlave(&containerizer, flags); @@ -1331,7 +1329,11 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest) .WillOnce(FutureArg<1>(&offers2)) .WillRepeatedly(Return()); // Ignore subsequent offers. - Try<PID<Slave> > slave2 = StartSlave(&containerizer, flags); + // Create new Flags as we require another work_dir for checkpoints. + slave::Flags flags2 = CreateSlaveFlags(); + flags2.resources = Option<string>(stringify(fullSlave)); + + Try<PID<Slave>> slave2 = StartSlave(&containerizer, flags2); ASSERT_SOME(slave2); AWAIT_READY(offers2); @@ -1681,13 +1683,9 @@ TEST_F(MasterTest, RecoveredSlaveDoesNotReregister) Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get(), _); + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = this->CreateSlaveFlags(); - // Setup recovery slave flags. - slaveFlags.checkpoint = true; - slaveFlags.recover = "reconnect"; - slaveFlags.strict = true; - Try<PID<Slave> > slave = StartSlave(slaveFlags); ASSERT_SOME(slave); @@ -1761,13 +1759,9 @@ TEST_F(MasterTest, NonStrictRegistryWriteOnly) Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get(), _); + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = this->CreateSlaveFlags(); - // Setup recovery slave flags. - slaveFlags.checkpoint = true; - slaveFlags.recover = "reconnect"; - slaveFlags.strict = true; - Try<PID<Slave> > slave = StartSlave(slaveFlags); ASSERT_SOME(slave); @@ -1916,11 +1910,9 @@ TEST_F(MasterTest, CancelRecoveredSlaveRemoval) Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get(), _); - // Start a slave with checkpointing. + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = CreateSlaveFlags(); - slaveFlags.checkpoint = true; - slaveFlags.recover = "reconnect"; - slaveFlags.strict = true; + Try<PID<Slave> > slave = StartSlave(slaveFlags); ASSERT_SOME(slave); @@ -2011,13 +2003,9 @@ TEST_F(MasterTest, RecoveredSlaveReregisters) Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get(), _); + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = this->CreateSlaveFlags(); - // Setup recovery slave flags. - slaveFlags.checkpoint = true; - slaveFlags.recover = "reconnect"; - slaveFlags.strict = true; - Try<PID<Slave> > slave = StartSlave(slaveFlags); ASSERT_SOME(slave); @@ -2960,10 +2948,7 @@ TEST_F(MasterTest, SlaveActiveEndpoint) Future<process::Message> slaveRegisteredMessage = FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _); - // Start a checkpointing slave. - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - Try<PID<Slave>> slave = StartSlave(flags); + Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/master_validation_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp index c874292..4f2ad58 100644 --- a/src/tests/master_validation_tests.cpp +++ b/src/tests/master_validation_tests.cpp @@ -206,7 +206,6 @@ TEST_F(CreateOperationValidationTest, InsufficientDiskResource) ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); - slaveFlags.checkpoint = true; slaveFlags.resources = "disk(role1):1024"; Try<PID<Slave>> slave = StartSlave(slaveFlags); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/mesos.cpp ---------------------------------------------------------------------- diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp index c8f43d2..11e8833 100644 --- a/src/tests/mesos.cpp +++ b/src/tests/mesos.cpp @@ -165,9 +165,6 @@ slave::Flags MesosTest::CreateSlaveFlags() flags.credential = path; - // TODO(vinod): Consider making this true and fixing the tests. - flags.checkpoint = false; - flags.resources = "cpus:2;mem:1024;disk:1024;ports:[31000-32000]"; flags.registration_backoff_factor = Milliseconds(10); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/partition_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp index bb96aed..1018e47 100644 --- a/src/tests/partition_tests.cpp +++ b/src/tests/partition_tests.cpp @@ -434,10 +434,7 @@ TEST_F(PartitionTest, OneWayPartitionMasterToSlave) // Ensure a ping reaches the slave. Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _); - // Start a checkpointing slave. - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - Try<PID<Slave> > slave = StartSlave(flags); + Try<PID<Slave>> slave = StartSlave(); ASSERT_SOME(slave); AWAIT_READY(slaveRegisteredMessage); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/persistent_volume_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/persistent_volume_tests.cpp b/src/tests/persistent_volume_tests.cpp index b617117..c5f72d5 100644 --- a/src/tests/persistent_volume_tests.cpp +++ b/src/tests/persistent_volume_tests.cpp @@ -182,7 +182,6 @@ TEST_F(PersistentVolumeTest, ResourcesCheckpointing) ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); - slaveFlags.checkpoint = true; slaveFlags.resources = "disk(role1):1024"; Try<PID<Slave>> slave = StartSlave(slaveFlags); @@ -413,8 +412,6 @@ TEST_F(PersistentVolumeTest, IncompatibleCheckpointedResources) ASSERT_SOME(master); slave::Flags slaveFlags = CreateSlaveFlags(); - - slaveFlags.checkpoint = true; slaveFlags.resources = "disk(role1):1024"; MockExecutor exec(DEFAULT_EXECUTOR_ID); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/reconciliation_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/reconciliation_tests.cpp b/src/tests/reconciliation_tests.cpp index acd7002..6042d8c 100644 --- a/src/tests/reconciliation_tests.cpp +++ b/src/tests/reconciliation_tests.cpp @@ -383,9 +383,8 @@ TEST_F(ReconciliationTest, SlaveInTransition) Try<PID<Master> > master = StartMaster(); ASSERT_SOME(master); - // Start a checkpointing slave. + // Reuse slaveFlags so both StartSlave() use the same work_dir. slave::Flags slaveFlags = CreateSlaveFlags(); - slaveFlags.checkpoint = true; Future<SlaveRegisteredMessage> slaveRegisteredMessage = FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/slave_recovery_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp index 53adae0..87f4a6a 100644 --- a/src/tests/slave_recovery_tests.cpp +++ b/src/tests/slave_recovery_tests.cpp @@ -134,14 +134,7 @@ class SlaveRecoveryTest : public ContainerizerTest<T> public: virtual slave::Flags CreateSlaveFlags() { - slave::Flags flags = ContainerizerTest<T>::CreateSlaveFlags(); - - // Setup recovery slave flags. - flags.checkpoint = true; - flags.recover = "reconnect"; - flags.strict = true; - - return flags; + return ContainerizerTest<T>::CreateSlaveFlags(); } }; @@ -1237,64 +1230,6 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingFramework) } -// This test ensures that a non-checkpointing slave's resources are not offered -// to a framework that requires checkpointing. -TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave) -{ - Try<PID<Master> > master = this->StartMaster(); - ASSERT_SOME(master); - - // Disable checkpointing for the slave. - slave::Flags flags = this->CreateSlaveFlags(); - flags.checkpoint = false; - - - Future<RegisterSlaveMessage> registerSlaveMessage = - FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _); - - Fetcher fetcher; - - Try<TypeParam*> containerizer = TypeParam::create(flags, true, &fetcher); - ASSERT_SOME(containerizer); - - Try<PID<Slave> > slave = this->StartSlave(containerizer.get(), flags); - ASSERT_SOME(slave); - - AWAIT_READY(registerSlaveMessage); - - MockScheduler sched; - - // Enable checkpointing for the framework. - FrameworkInfo frameworkInfo; - frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO); - frameworkInfo.set_checkpoint(true); - - MesosSchedulerDriver driver( - &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL); - - Future<Nothing> registered; - EXPECT_CALL(sched, registered(_, _, _)) - .WillOnce(FutureSatisfy(®istered)); - - EXPECT_CALL(sched, resourceOffers(_, _)) - .Times(0); // No offers should be received! - - Clock::pause(); - - driver.start(); - - // Wait for scheduler to register. We do a Clock::settle() here - // to ensure that no offers are received by the scheduler. - AWAIT_READY(registered); - Clock::settle(); - - driver.stop(); - driver.join(); - - this->Shutdown(); - delete containerizer.get(); -} - // Scheduler asks a restarted slave to kill a task that has been // running before the slave restarted. This test ensures that a // restarted slave is able to communicate with all components http://git-wip-us.apache.org/repos/asf/mesos/blob/b2f73095/src/tests/status_update_manager_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/status_update_manager_tests.cpp b/src/tests/status_update_manager_tests.cpp index 216a22e..36dab42 100644 --- a/src/tests/status_update_manager_tests.cpp +++ b/src/tests/status_update_manager_tests.cpp @@ -97,8 +97,9 @@ TEST_F(StatusUpdateManagerTest, CheckpointStatusUpdate) MockExecutor exec(DEFAULT_EXECUTOR_ID); + // Require flags to retrieve work_dir when recovering + // the checkpointed data. slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); @@ -192,7 +193,6 @@ TEST_F(StatusUpdateManagerTest, RetryStatusUpdate) MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); @@ -266,10 +266,7 @@ TEST_F(StatusUpdateManagerTest, IgnoreDuplicateStatusUpdateAck) MockExecutor exec(DEFAULT_EXECUTOR_ID); - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - - Try<PID<Slave> > slave = StartSlave(&exec, flags); + Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. @@ -382,10 +379,7 @@ TEST_F(StatusUpdateManagerTest, IgnoreUnexpectedStatusUpdateAck) MockExecutor exec(DEFAULT_EXECUTOR_ID); - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - - Try<PID<Slave> > slave = StartSlave(&exec, flags); + Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. @@ -474,10 +468,7 @@ TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateBeforeAck) MockExecutor exec(DEFAULT_EXECUTOR_ID); - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - - Try<PID<Slave> > slave = StartSlave(&exec, flags); + Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line. @@ -587,7 +578,6 @@ TEST_F(StatusUpdateManagerTest, DuplicateTerminalUpdateAfterAck) MockExecutor exec(DEFAULT_EXECUTOR_ID); slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; Try<PID<Slave> > slave = StartSlave(&exec, flags); ASSERT_SOME(slave); @@ -685,10 +675,7 @@ TEST_F(StatusUpdateManagerTest, DuplicateUpdateBeforeAck) MockExecutor exec(DEFAULT_EXECUTOR_ID); - slave::Flags flags = CreateSlaveFlags(); - flags.checkpoint = true; - - Try<PID<Slave> > slave = StartSlave(&exec, flags); + Try<PID<Slave> > slave = StartSlave(&exec); ASSERT_SOME(slave); FrameworkInfo frameworkInfo; // Bug in gcc 4.1.*, must assign on next line.
