Repository: mesos
Updated Branches:
  refs/heads/master 57385ec45 -> ce9c75d3e


Maintenance Primitives: Prevent Slave registration from DOWN machine.

Review: https://reviews.apache.org/r/37623


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ce9c75d3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ce9c75d3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ce9c75d3

Branch: refs/heads/master
Commit: ce9c75d3eefe370e0ca87a294e96c6d2ae6cb566
Parents: 147420e
Author: Joris Van Remoortere <[email protected]>
Authored: Sun Aug 30 14:32:46 2015 -0400
Committer: Joris Van Remoortere <[email protected]>
Committed: Mon Sep 14 13:58:37 2015 -0400

----------------------------------------------------------------------
 include/mesos/type_utils.hpp           | 17 ++++++++++++
 include/mesos/v1/mesos.hpp             | 17 ++++++++++++
 src/master/master.cpp                  | 36 ++++++++++++++++++++++++++
 src/tests/master_maintenance_tests.cpp | 40 ++++++++++++++++++++++++++++-
 4 files changed, 109 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/include/mesos/type_utils.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp
index 64c2a86..6cedf07 100644
--- a/include/mesos/type_utils.hpp
+++ b/include/mesos/type_utils.hpp
@@ -319,6 +319,23 @@ inline std::ostream& operator<<(std::ostream& stream, 
const TaskID& taskId)
 }
 
 
+inline std::ostream& operator<<(
+    std::ostream& stream,
+    const MachineID& machineId)
+{
+  if (machineId.has_hostname() && machineId.has_ip()) {
+    return stream << machineId.hostname() << " (" << machineId.ip() << ")";
+  }
+
+  // If only a hostname is present.
+  if (machineId.has_hostname()) {
+    return stream << machineId.hostname();
+  } else { // If there is no hostname, then there is an IP.
+    return stream << "(" << machineId.ip() << ")";
+  }
+}
+
+
 inline std::ostream& operator<<(std::ostream& stream, const TaskInfo& task)
 {
   return stream << task.DebugString();

http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/include/mesos/v1/mesos.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp
index f8f9617..260e112 100644
--- a/include/mesos/v1/mesos.hpp
+++ b/include/mesos/v1/mesos.hpp
@@ -255,6 +255,23 @@ inline std::ostream& operator<<(
 }
 
 
+inline std::ostream& operator<<(
+    std::ostream& stream,
+    const MachineID& machineId)
+{
+  if (machineId.has_hostname() && machineId.has_ip()) {
+    return stream << machineId.hostname() << " (" << machineId.ip() << ")";
+  }
+
+  // If only a hostname is present.
+  if (machineId.has_hostname()) {
+    return stream << machineId.hostname();
+  } else { // If there is no hostname, then there is an IP.
+    return stream << "(" << machineId.ip() << ")";
+  }
+}
+
+
 inline std::ostream& operator<<(std::ostream& stream, const MasterInfo& master)
 {
   return stream << master.DebugString();

http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 61236b3..f26271c 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -3746,6 +3746,24 @@ void Master::registerSlave(
     return;
   }
 
+  MachineID machineId;
+  machineId.set_hostname(slaveInfo.hostname());
+  machineId.set_ip(stringify(from.address.ip));
+
+  // Slaves are not allowed to register while the machine they are on is in
+  // `DOWN` mode.
+  if (machines.contains(machineId) &&
+      machines[machineId].info.mode() == MachineInfo::DOWN) {
+    LOG(WARNING) << "Refusing registration of slave at " << from
+                 << " because the machine '" << machineId << "' that it is "
+                 << "running on is `DOWN`";
+
+    ShutdownMessage message;
+    message.set_message("Machine is `DOWN`");
+    send(from, message);
+    return;
+  }
+
   // Check if this slave is already registered (because it retries).
   if (slaves.registered.contains(from)) {
     Slave* slave = slaves.registered.get(from);
@@ -3909,6 +3927,24 @@ void Master::reregisterSlave(
     return;
   }
 
+  MachineID machineId;
+  machineId.set_hostname(slaveInfo.hostname());
+  machineId.set_ip(stringify(from.address.ip));
+
+  // Slaves are not allowed to register while the machine they are on is in
+  // 'DOWN` mode.
+  if (machines.contains(machineId) &&
+      machines[machineId].info.mode() == MachineInfo::DOWN) {
+    LOG(WARNING) << "Refusing re-registration of slave at " << from
+                 << " because the machine '" << machineId << "' that it is "
+                 << "running on is `DOWN`";
+
+    ShutdownMessage message;
+    message.set_message("Machine is `DOWN`");
+    send(from, message);
+    return;
+  }
+
   if (slaves.removed.get(slaveInfo.id()).isSome()) {
     // To compensate for the case where a non-strict registrar is
     // being used, we explicitly deny removed slaves from

http://git-wip-us.apache.org/repos/asf/mesos/blob/ce9c75d3/src/tests/master_maintenance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_maintenance_tests.cpp 
b/src/tests/master_maintenance_tests.cpp
index 6ae502d..4478505 100644
--- a/src/tests/master_maintenance_tests.cpp
+++ b/src/tests/master_maintenance_tests.cpp
@@ -661,7 +661,7 @@ TEST_F(MasterMaintenanceTest, EnterMaintenanceMode)
 
   AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
 
-  // Wait for the slave to be shut down.
+  // Wait for the slave to be told to shut down.
   AWAIT_READY(shutdownMessage);
 
   // Verify that we received a TASK_LOST.
@@ -671,6 +671,44 @@ TEST_F(MasterMaintenanceTest, EnterMaintenanceMode)
   // Verify that the framework received the slave lost message.
   AWAIT_READY(slaveLost);
 
+  // Wait on the agent to terminate so that it wipes out it's latest symlink.
+  // This way when we launch a new agent it will register with a new agent id.
+  wait(slave.get());
+
+  // Ensure that the slave gets shut down immediately if it tries to register
+  // from a machine that is under maintenance.
+  shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), master.get(), _);
+  EXPECT_TRUE(shutdownMessage.isPending());
+
+  slave = StartSlave();
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(shutdownMessage);
+
+  // Wait on the agent to terminate so that it wipes out it's latest symlink.
+  // This way when we launch a new agent it will register with a new agent id.
+  wait(slave.get());
+
+  // Stop maintenance.
+  response =
+    process::http::post(
+        master.get(),
+        "machine/up",
+        headers,
+        stringify(JSON::Protobuf(createMachineList({machine}))));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Capture the registration message.
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  // Start the agent again.
+  slave = StartSlave();
+
+  // Wait for agent registration.
+  AWAIT_READY(slaveRegisteredMessage);
+
   driver.stop();
   driver.join();
 

Reply via email to