Repository: mesos Updated Branches: refs/heads/master 57385ec45 -> ce9c75d3e
Maintenance Primitives: Prevent Slave registration from DOWN machine. Review: https://reviews.apache.org/r/37623 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ce9c75d3 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ce9c75d3 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ce9c75d3 Branch: refs/heads/master Commit: ce9c75d3eefe370e0ca87a294e96c6d2ae6cb566 Parents: 147420e Author: Joris Van Remoortere <[email protected]> Authored: Sun Aug 30 14:32:46 2015 -0400 Committer: Joris Van Remoortere <[email protected]> Committed: Mon Sep 14 13:58:37 2015 -0400 ---------------------------------------------------------------------- include/mesos/type_utils.hpp | 17 ++++++++++++ include/mesos/v1/mesos.hpp | 17 ++++++++++++ src/master/master.cpp | 36 ++++++++++++++++++++++++++ src/tests/master_maintenance_tests.cpp | 40 ++++++++++++++++++++++++++++- 4 files changed, 109 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/include/mesos/type_utils.hpp ---------------------------------------------------------------------- diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp index 64c2a86..6cedf07 100644 --- a/include/mesos/type_utils.hpp +++ b/include/mesos/type_utils.hpp @@ -319,6 +319,23 @@ inline std::ostream& operator<<(std::ostream& stream, const TaskID& taskId) } +inline std::ostream& operator<<( + std::ostream& stream, + const MachineID& machineId) +{ + if (machineId.has_hostname() && machineId.has_ip()) { + return stream << machineId.hostname() << " (" << machineId.ip() << ")"; + } + + // If only a hostname is present. + if (machineId.has_hostname()) { + return stream << machineId.hostname(); + } else { // If there is no hostname, then there is an IP. + return stream << "(" << machineId.ip() << ")"; + } +} + + inline std::ostream& operator<<(std::ostream& stream, const TaskInfo& task) { return stream << task.DebugString(); http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/include/mesos/v1/mesos.hpp ---------------------------------------------------------------------- diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp index f8f9617..260e112 100644 --- a/include/mesos/v1/mesos.hpp +++ b/include/mesos/v1/mesos.hpp @@ -255,6 +255,23 @@ inline std::ostream& operator<<( } +inline std::ostream& operator<<( + std::ostream& stream, + const MachineID& machineId) +{ + if (machineId.has_hostname() && machineId.has_ip()) { + return stream << machineId.hostname() << " (" << machineId.ip() << ")"; + } + + // If only a hostname is present. + if (machineId.has_hostname()) { + return stream << machineId.hostname(); + } else { // If there is no hostname, then there is an IP. + return stream << "(" << machineId.ip() << ")"; + } +} + + inline std::ostream& operator<<(std::ostream& stream, const MasterInfo& master) { return stream << master.DebugString(); http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 61236b3..f26271c 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -3746,6 +3746,24 @@ void Master::registerSlave( return; } + MachineID machineId; + machineId.set_hostname(slaveInfo.hostname()); + machineId.set_ip(stringify(from.address.ip)); + + // Slaves are not allowed to register while the machine they are on is in + // `DOWN` mode. + if (machines.contains(machineId) && + machines[machineId].info.mode() == MachineInfo::DOWN) { + LOG(WARNING) << "Refusing registration of slave at " << from + << " because the machine '" << machineId << "' that it is " + << "running on is `DOWN`"; + + ShutdownMessage message; + message.set_message("Machine is `DOWN`"); + send(from, message); + return; + } + // Check if this slave is already registered (because it retries). if (slaves.registered.contains(from)) { Slave* slave = slaves.registered.get(from); @@ -3909,6 +3927,24 @@ void Master::reregisterSlave( return; } + MachineID machineId; + machineId.set_hostname(slaveInfo.hostname()); + machineId.set_ip(stringify(from.address.ip)); + + // Slaves are not allowed to register while the machine they are on is in + // 'DOWN` mode. + if (machines.contains(machineId) && + machines[machineId].info.mode() == MachineInfo::DOWN) { + LOG(WARNING) << "Refusing re-registration of slave at " << from + << " because the machine '" << machineId << "' that it is " + << "running on is `DOWN`"; + + ShutdownMessage message; + message.set_message("Machine is `DOWN`"); + send(from, message); + return; + } + if (slaves.removed.get(slaveInfo.id()).isSome()) { // To compensate for the case where a non-strict registrar is // being used, we explicitly deny removed slaves from http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/src/tests/master_maintenance_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/master_maintenance_tests.cpp b/src/tests/master_maintenance_tests.cpp index 6ae502d..4478505 100644 --- a/src/tests/master_maintenance_tests.cpp +++ b/src/tests/master_maintenance_tests.cpp @@ -661,7 +661,7 @@ TEST_F(MasterMaintenanceTest, EnterMaintenanceMode) AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); - // Wait for the slave to be shut down. + // Wait for the slave to be told to shut down. AWAIT_READY(shutdownMessage); // Verify that we received a TASK_LOST. @@ -671,6 +671,44 @@ TEST_F(MasterMaintenanceTest, EnterMaintenanceMode) // Verify that the framework received the slave lost message. AWAIT_READY(slaveLost); + // Wait on the agent to terminate so that it wipes out it's latest symlink. + // This way when we launch a new agent it will register with a new agent id. + wait(slave.get()); + + // Ensure that the slave gets shut down immediately if it tries to register + // from a machine that is under maintenance. + shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), master.get(), _); + EXPECT_TRUE(shutdownMessage.isPending()); + + slave = StartSlave(); + ASSERT_SOME(slave); + + AWAIT_READY(shutdownMessage); + + // Wait on the agent to terminate so that it wipes out it's latest symlink. + // This way when we launch a new agent it will register with a new agent id. + wait(slave.get()); + + // Stop maintenance. + response = + process::http::post( + master.get(), + "machine/up", + headers, + stringify(JSON::Protobuf(createMachineList({machine})))); + + AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response); + + // Capture the registration message. + Future<SlaveRegisteredMessage> slaveRegisteredMessage = + FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _); + + // Start the agent again. + slave = StartSlave(); + + // Wait for agent registration. + AWAIT_READY(slaveRegisteredMessage); + driver.stop(); driver.join();
