Maintenance Primitives: Set offer `unavailability` for maintenance. Review: https://reviews.apache.org/r/37172
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ee1eb2ba Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ee1eb2ba Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ee1eb2ba Branch: refs/heads/master Commit: ee1eb2ba6b17cba66ad99f4e6344416c2d2709d2 Parents: 9e7ee6b Author: Joris Van Remoortere <[email protected]> Authored: Tue Aug 25 18:39:35 2015 -0400 Committer: Joris Van Remoortere <[email protected]> Committed: Mon Sep 14 13:58:37 2015 -0400 ---------------------------------------------------------------------- include/mesos/type_utils.hpp | 24 ++++++++ include/mesos/v1/mesos.hpp | 24 ++++++++ src/master/master.cpp | 8 +++ src/tests/master_maintenance_tests.cpp | 96 +++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/ee1eb2ba/include/mesos/type_utils.hpp ---------------------------------------------------------------------- diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp index 4fb0037..64c2a86 100644 --- a/include/mesos/type_utils.hpp +++ b/include/mesos/type_utils.hpp @@ -108,6 +108,18 @@ inline bool operator==(const TaskID& left, const TaskID& right) } +inline bool operator==(const TimeInfo& left, const TimeInfo& right) +{ + return left.nanoseconds() == right.nanoseconds(); +} + + +inline bool operator==(const DurationInfo& left, const DurationInfo& right) +{ + return left.nanoseconds() == right.nanoseconds(); +} + + inline bool operator==(const ContainerID& left, const std::string& right) { return left.value() == right; @@ -183,6 +195,18 @@ inline bool operator!=(const SlaveID& left, const SlaveID& right) } +inline bool operator!=(const TimeInfo& left, const TimeInfo& right) +{ + return !(left == right); +} + + +inline bool operator!=(const DurationInfo& left, const DurationInfo& right) +{ + return !(left == right); +} + + inline bool operator<(const ContainerID& left, const ContainerID& right) { return left.value() < right.value(); http://git-wip-us.apache.org/repos/asf/mesos/blob/ee1eb2ba/include/mesos/v1/mesos.hpp ---------------------------------------------------------------------- diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp index 0d695f3..f8f9617 100644 --- a/include/mesos/v1/mesos.hpp +++ b/include/mesos/v1/mesos.hpp @@ -89,6 +89,18 @@ inline bool operator==(const TaskID& left, const TaskID& right) } +inline bool operator==(const TimeInfo& left, const TimeInfo& right) +{ + return left.nanoseconds() == right.nanoseconds(); +} + + +inline bool operator==(const DurationInfo& left, const DurationInfo& right) +{ + return left.nanoseconds() == right.nanoseconds(); +} + + inline bool operator==(const ContainerID& left, const std::string& right) { return left.value() == right; @@ -125,6 +137,18 @@ inline bool operator==(const TaskID& left, const std::string& right) } +inline bool operator!=(const TimeInfo& left, const TimeInfo& right) +{ + return !(left == right); +} + + +inline bool operator!=(const DurationInfo& left, const DurationInfo& right) +{ + return !(left == right); +} + + inline bool operator!=(const ContainerID& left, const ContainerID& right) { return left.value() != right.value(); http://git-wip-us.apache.org/repos/asf/mesos/blob/ee1eb2ba/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 31fc83d..5c2f032 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -4654,6 +4654,14 @@ void Master::offer(const FrameworkID& frameworkId, } } + // If the slave in this offer is planned to be unavailable due to + // maintenance in the future, then set the Unavailability. + CHECK(machines.contains(slave->machineId)); + if (machines[slave->machineId].info.has_unavailability()) { + offer->mutable_unavailability()->CopyFrom( + machines[slave->machineId].info.unavailability()); + } + offers[offer->id()] = offer; framework->addOffer(offer); http://git-wip-us.apache.org/repos/asf/mesos/blob/ee1eb2ba/src/tests/master_maintenance_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_maintenance_tests.cpp b/src/tests/master_maintenance_tests.cpp index fb8dca3..5811446 100644 --- a/src/tests/master_maintenance_tests.cpp +++ b/src/tests/master_maintenance_tests.cpp @@ -63,6 +63,7 @@ using mesos::internal::protobuf::maintenance::createUnavailability; using mesos::internal::protobuf::maintenance::createWindow; using std::string; +using std::vector; using testing::DoAll; @@ -90,10 +91,18 @@ public: unavailability = createUnavailability(Clock::now()); } + virtual slave::Flags CreateSlaveFlags() + { + slave::Flags slaveFlags = MesosTest::CreateSlaveFlags(); + slaveFlags.hostname = maintenanceHostname; + return slaveFlags; + } // Default headers for all POST's to maintenance endpoints. hashmap<string, string> headers; + const string maintenanceHostname = "maintenance-host"; + // Some generic `MachineID`s that can be used in this test. MachineID machine1; MachineID machine2; @@ -291,6 +300,93 @@ TEST_F(MasterMaintenanceTest, FailToUnscheduleDeactivatedMachines) } +// Test ensures that an offer will have an `unavailability` set if the +// slave is scheduled to go down for maintenance. +TEST_F(MasterMaintenanceTest, PendingUnavailabilityTest) +{ + Try<PID<Master>> master = StartMaster(); + ASSERT_SOME(master); + + MockExecutor exec(DEFAULT_EXECUTOR_ID); + + Try<PID<Slave>> slave = StartSlave(&exec); + ASSERT_SOME(slave); + + MockScheduler sched; + MesosSchedulerDriver driver( + &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); + + EXPECT_CALL(sched, registered(&driver, _, _)) + .Times(1); + + // Intercept offers sent to the scheduler. + Future<vector<Offer>> normalOffers; + Future<vector<Offer>> unavailabilityOffers; + EXPECT_CALL(sched, resourceOffers(&driver, _)) + .WillOnce(FutureArg<1>(&normalOffers)) + .WillOnce(FutureArg<1>(&unavailabilityOffers)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + // Start the test. + driver.start(); + + // Wait for some normal offers. + AWAIT_READY(normalOffers); + EXPECT_NE(0u, normalOffers.get().size()); + + // Check that unavailability is not set. + foreach (const Offer& offer, normalOffers.get()) { + EXPECT_FALSE(offer.has_unavailability()); + + // We have a few seconds between allocations (by default). That should + // be enough time to post a schedule before the next allocation. + driver.declineOffer(offer.id()); + } + + // Schedule this slave for maintenance. + MachineID machine; + machine.set_hostname("maintenance-host"); + machine.set_ip(stringify(slave.get().address.ip)); + + // TODO(jmlvanre): Replace Time(0.0) with `Clock::now()` once JSON double + // conversion is fixed. For now using a rounded time avoids the issue. + const Time start = Time::create(0.0).get() + Seconds(60); + const Duration duration = Seconds(120); + const Unavailability unavailability = createUnavailability(start, duration); + + // Post a valid schedule with one machine. + maintenance::Schedule schedule = createSchedule({ + createWindow({machine}, unavailability)}); + + Future<Response> response = process::http::post( + master.get(), + "maintenance/schedule", + headers, + stringify(JSON::Protobuf(schedule))); + + AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); + + // Speed up the test by not waiting until the next allocation. + driver.reviveOffers(); + + // Wait for some offers. + AWAIT_READY(unavailabilityOffers); + EXPECT_NE(0u, unavailabilityOffers.get().size()); + + // Check that each offer has an unavailability. + foreach (const Offer& offer, unavailabilityOffers.get()) { + EXPECT_TRUE(offer.has_unavailability()); + EXPECT_EQ(unavailability.start(), offer.unavailability().start()); + EXPECT_EQ(unavailability.duration(), offer.unavailability().duration()); + } + + driver.stop(); + driver.join(); + + Shutdown(); // Must shutdown before 'containerizer' gets deallocated. +} + + // Posts valid and invalid machines to the maintenance start endpoint. TEST_F(MasterMaintenanceTest, BringDownMachines) {
