Repository: mesos
Updated Branches:
  refs/heads/1.5.x 2440c7310 -> eee8f2124


Prevented master from asking agents to shutdown on auth failures.

The Mesos master sends a `ShutdownMessage` to an agent if there is an
authentication or an authorization error during agent (re)registration.

Upon receipt of this message, the agent kills alls its tasks and commits
suicide. This means that transient auth errors can lead to whole agents
being killed along with its tasks.

This patch prevents the master from sending a `ShutdownMessage` in these
cases.

Review: https://reviews.apache.org/r/67791/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b8bbe0d4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b8bbe0d4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b8bbe0d4

Branch: refs/heads/1.5.x
Commit: b8bbe0d49d68932e36a76c36e4357ac3fdb5d244
Parents: 2440c73
Author: Gastón Kleiman <gas...@mesosphere.io>
Authored: Tue Jul 10 08:04:15 2018 -0700
Committer: Greg Mann <gregorywm...@gmail.com>
Committed: Tue Jul 10 09:07:27 2018 -0700

----------------------------------------------------------------------
 src/master/master.cpp                    | 14 -------
 src/tests/authentication_tests.cpp       | 33 +++++++++++++---
 src/tests/master_authorization_tests.cpp | 55 ++++++++++++++++++++-------
 3 files changed, 69 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b8bbe0d4/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 15893a7..05724f1 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6123,9 +6123,6 @@ void Master::registerSlave(
     // without authentication.
     LOG(WARNING) << "Refusing registration of agent at " << from
                  << " because it is not authenticated";
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -6200,10 +6197,6 @@ void Master::_registerSlave(
                  << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.registering.erase(pid);
     return;
   }
@@ -6447,9 +6440,6 @@ void Master::reregisterSlave(
     // re-register without authentication.
     LOG(WARNING) << "Refusing re-registration of agent at " << from
                  << " because it is not authenticated";
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -6552,10 +6542,6 @@ void Master::_reregisterSlave(
                  << " at " << pid << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.reregistering.erase(slaveInfo.id());
     return;
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/b8bbe0d4/src/tests/authentication_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/authentication_tests.cpp 
b/src/tests/authentication_tests.cpp
index 3c6124f..36944bc 100644
--- a/src/tests/authentication_tests.cpp
+++ b/src/tests/authentication_tests.cpp
@@ -79,14 +79,21 @@ TEST_F(AuthenticationTest, UnauthenticatedFramework)
 
 
 // This test verifies that an unauthenticated slave is
-// denied registration by the master.
+// denied registration by the master, but not shut down.
 TEST_F(AuthenticationTest, UnauthenticatedSlave)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
 
-  Future<ShutdownMessage> shutdownMessage =
-    FUTURE_PROTOBUF(ShutdownMessage(), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage1 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   // Start the slave without credentials.
   slave::Flags flags = CreateSlaveFlags();
@@ -96,9 +103,23 @@ TEST_F(AuthenticationTest, UnauthenticatedSlave)
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
   ASSERT_SOME(slave);
 
-  // Slave should get error message from the master.
-  AWAIT_READY(shutdownMessage);
-  ASSERT_NE("", shutdownMessage->message());
+  AWAIT_READY(registerSlaveMessage1);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage2 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  // Advance the clock to trigger another registration attempt.
+  Clock::pause();
+  Clock::advance(slave::REGISTER_RETRY_INTERVAL_MAX);
+  Clock::settle();
+  Clock::resume();
+
+  AWAIT_READY(registerSlaveMessage2);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/b8bbe0d4/src/tests/master_authorization_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_authorization_tests.cpp 
b/src/tests/master_authorization_tests.cpp
index 676543a..3c28e27 100644
--- a/src/tests/master_authorization_tests.cpp
+++ b/src/tests/master_authorization_tests.cpp
@@ -2359,10 +2359,12 @@ TEST_F(MasterAuthorizationTest, 
AuthorizedToRegisterAndReregisterAgent)
 }
 
 
-// This test verifies that the agent is shut down by the master if
-// it is not authorized to register.
+// This test verifies that an agent without the right ACLs
+// is not allowed to register and is not shut down.
 TEST_F(MasterAuthorizationTest, UnauthorizedToRegisterAgent)
 {
+  Clock::pause();
+
   // Set up ACLs that disallows the agent's principal to register.
   ACLs acls;
   mesos::ACL::RegisterAgent* acl = acls.add_register_agents();
@@ -2375,14 +2377,31 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToRegisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage ().GetTypeName()), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
-  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
   ASSERT_SOME(slave);
 
-  AWAIT_READY(shutdownMessage);
+  // Advance the clock to trigger the registration attempt.
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  Clock::settle();
+
+  AWAIT_READY(registerSlaveMessage);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::settle();
 }
 
 
@@ -2402,12 +2421,14 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  slave::Flags slaveFlags = CreateSlaveFlags();
-  StandaloneMasterDetector detector(master.get()->pid);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
 
-  Future<Message> slaveRegisteredMessage =
-    FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _);
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
 
+  StandaloneMasterDetector detector(master.get()->pid);
   Try<Owned<cluster::Slave>> slave = StartSlave(&detector);
   ASSERT_SOME(slave);
 
@@ -2420,15 +2441,23 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   acl->mutable_agents()->set_type(ACL::Entity::NONE);
   flags.acls = acls;
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // The agent should try but not be able to reregister.
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveReregisteredMessage(), _, _);
 
   master = StartMaster(flags);
   ASSERT_SOME(master);
 
   detector.appoint(master.get()->pid);
 
-  AWAIT_READY(shutdownMessage);
+  AWAIT_READY(reregisterSlaveMessage);
+
+  // Settle to make sure neither `SlaveReregisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 

Reply via email to