Repository: mesos
Updated Branches:
  refs/heads/1.4.x ee18cf95f -> 5f6d4cdc7


Prevented master from asking agents to shutdown on auth failures.

The Mesos master sends a `ShutdownMessage` to an agent if there is an
authentication or an authorization error during agent (re)registration.

Upon receipt of this message, the agent kills alls its tasks and commits
suicide. This means that transient auth errors can lead to whole agents
being killed along with its tasks.

This patch prevents the master from sending a `ShutdownMessage` in these
cases.

Review: https://reviews.apache.org/r/67791


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9cca4544
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9cca4544
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9cca4544

Branch: refs/heads/1.4.x
Commit: 9cca4544f3aeab08dde02d63cfd710520dd88512
Parents: ee18cf9
Author: Gastón Kleiman <gaston.klei...@gmail.com>
Authored: Fri Jun 29 16:13:01 2018 -0700
Committer: Greg Mann <gregorywm...@gmail.com>
Committed: Tue Jul 10 13:44:35 2018 -0700

----------------------------------------------------------------------
 src/master/master.cpp                    | 14 -------
 src/tests/authentication_tests.cpp       | 31 ++++++++++++---
 src/tests/master_authorization_tests.cpp | 57 ++++++++++++++++++++-------
 3 files changed, 69 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9cca4544/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 2f13fb1..f654300 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -5732,9 +5732,6 @@ void Master::registerSlave(
     // without authentication.
     LOG(WARNING) << "Refusing registration of agent at " << from
                  << " because it is not authenticated";
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -5807,10 +5804,6 @@ void Master::_registerSlave(
                  << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.registering.erase(pid);
     return;
   }
@@ -6031,9 +6024,6 @@ void Master::reregisterSlave(
     // re-register without authentication.
     LOG(WARNING) << "Refusing re-registration of agent at " << from
                  << " because it is not authenticated";
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -6116,10 +6106,6 @@ void Master::_reregisterSlave(
                  << " at " << pid << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.reregistering.erase(slaveInfo.id());
     return;
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/9cca4544/src/tests/authentication_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/authentication_tests.cpp 
b/src/tests/authentication_tests.cpp
index b04be76..842badf 100644
--- a/src/tests/authentication_tests.cpp
+++ b/src/tests/authentication_tests.cpp
@@ -89,8 +89,15 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(AuthenticationTest, 
UnauthenticatedSlave)
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
 
-  Future<ShutdownMessage> shutdownMessage =
-    FUTURE_PROTOBUF(ShutdownMessage(), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage1 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   // Start the slave without credentials.
   slave::Flags flags = CreateSlaveFlags();
@@ -100,9 +107,23 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(AuthenticationTest, 
UnauthenticatedSlave)
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
   ASSERT_SOME(slave);
 
-  // Slave should get error message from the master.
-  AWAIT_READY(shutdownMessage);
-  ASSERT_NE("", shutdownMessage->message());
+  AWAIT_READY(registerSlaveMessage1);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage2 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  // Advance the clock to trigger another registration attempt.
+  Clock::pause();
+  Clock::advance(slave::REGISTER_RETRY_INTERVAL_MAX);
+  Clock::settle();
+  Clock::resume();
+
+  AWAIT_READY(registerSlaveMessage2);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/9cca4544/src/tests/master_authorization_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_authorization_tests.cpp 
b/src/tests/master_authorization_tests.cpp
index 4b446e8..f99faeb 100644
--- a/src/tests/master_authorization_tests.cpp
+++ b/src/tests/master_authorization_tests.cpp
@@ -2375,10 +2375,12 @@ TEST_F(MasterAuthorizationTest, 
AuthorizedToRegisterAndReregisterAgent)
 }
 
 
-// This test verifies that the agent is shut down by the master if
-// it is not authorized to register.
+// This test verifies that an agent without the right ACLs
+// is not allowed to register and is not shut down.
 TEST_F(MasterAuthorizationTest, UnauthorizedToRegisterAgent)
 {
+  Clock::pause();
+
   // Set up ACLs that disallows the agent's principal to register.
   ACLs acls;
   mesos::ACL::RegisterAgent* acl = acls.add_register_agents();
@@ -2391,14 +2393,31 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToRegisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage ().GetTypeName()), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
-  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
   ASSERT_SOME(slave);
 
-  AWAIT_READY(shutdownMessage);
+  // Advance the clock to trigger the registration attempt.
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  Clock::settle();
+
+  AWAIT_READY(registerSlaveMessage);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::settle();
 }
 
 
@@ -2418,12 +2437,14 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  slave::Flags slaveFlags = CreateSlaveFlags();
-  StandaloneMasterDetector detector(master.get()->pid);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
 
-  Future<Message> slaveRegisteredMessage =
-    FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _);
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
 
+  StandaloneMasterDetector detector(master.get()->pid);
   Try<Owned<cluster::Slave>> slave = StartSlave(&detector);
   ASSERT_SOME(slave);
 
@@ -2432,19 +2453,27 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   // Master fails over.
   master->reset();
 
-  // The new master doesn't allow this agent principal to re-register.
+  // The new master doesn't allow this agent principal to reregister.
   acl->mutable_agents()->set_type(ACL::Entity::NONE);
   flags.acls = acls;
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // The agent should try but not be able to re-register.
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveReregisteredMessage(), _, _);
 
   master = StartMaster(flags);
   ASSERT_SOME(master);
 
   detector.appoint(master.get()->pid);
 
-  AWAIT_READY(shutdownMessage);
+  AWAIT_READY(reregisterSlaveMessage);
+
+  // Settle to make sure neither `SlaveReregisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 

Reply via email to