Repository: mesos
Updated Branches:
  refs/heads/1.6.x 052af588c -> ae82dd5cc


Prevented master from asking agents to shutdown on auth failures.

The Mesos master sends a `ShutdownMessage` to an agent if there is an
authentication or an authorization error during agent (re)registration.

Upon receipt of this message, the agent kills alls its tasks and commits
suicide. This means that transient auth errors can lead to whole agents
being killed along with its tasks.

This patch prevents the master from sending a `ShutdownMessage` in these
cases.

Review: https://reviews.apache.org/r/67791/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/778430e1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/778430e1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/778430e1

Branch: refs/heads/1.6.x
Commit: 778430e1368f22e3038a1de02b17f099ce0dab1d
Parents: 052af58
Author: Gastón Kleiman <gas...@mesosphere.io>
Authored: Tue Jul 10 08:04:15 2018 -0700
Committer: Greg Mann <gregorywm...@gmail.com>
Committed: Tue Jul 10 09:02:57 2018 -0700

----------------------------------------------------------------------
 src/master/master.cpp                    |  16 ----
 src/tests/authentication_tests.cpp       |  33 ++++++--
 src/tests/master_authorization_tests.cpp | 105 ++++++++++++++++++++------
 3 files changed, 111 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/778430e1/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 69411b5..42d64d1 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6571,10 +6571,6 @@ void Master::registerSlave(
     // without authentication.
     LOG(WARNING) << "Refusing registration of agent at " << from
                  << " because it is not authenticated";
-
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -6660,10 +6656,6 @@ void Master::_registerSlave(
                  << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.registering.erase(pid);
     return;
   }
@@ -6907,10 +6899,6 @@ void Master::reregisterSlave(
     // reregister without authentication.
     LOG(WARNING) << "Refusing re-registration of agent at " << from
                  << " because it is not authenticated";
-
-    ShutdownMessage message;
-    message.set_message("Agent is not authenticated");
-    send(from, message);
     return;
   }
 
@@ -7025,10 +7013,6 @@ void Master::_reregisterSlave(
                  << " at " << pid << " (" << slaveInfo.hostname() << ")"
                  << ": " << authorizationError.get();
 
-    ShutdownMessage message;
-    message.set_message(authorizationError.get());
-    send(pid, message);
-
     slaves.reregistering.erase(slaveInfo.id());
     return;
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/778430e1/src/tests/authentication_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/authentication_tests.cpp 
b/src/tests/authentication_tests.cpp
index bd46cbc..c9a8f85 100644
--- a/src/tests/authentication_tests.cpp
+++ b/src/tests/authentication_tests.cpp
@@ -79,14 +79,21 @@ TEST_F(AuthenticationTest, UnauthenticatedFramework)
 
 
 // This test verifies that an unauthenticated slave is
-// denied registration by the master.
+// denied registration by the master, but not shut down.
 TEST_F(AuthenticationTest, UnauthenticatedSlave)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
 
-  Future<ShutdownMessage> shutdownMessage =
-    FUTURE_PROTOBUF(ShutdownMessage(), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage1 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   // Start the slave without credentials.
   slave::Flags flags = CreateSlaveFlags();
@@ -96,9 +103,23 @@ TEST_F(AuthenticationTest, UnauthenticatedSlave)
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
   ASSERT_SOME(slave);
 
-  // Slave should get error message from the master.
-  AWAIT_READY(shutdownMessage);
-  ASSERT_NE("", shutdownMessage->message());
+  AWAIT_READY(registerSlaveMessage1);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage2 =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  // Advance the clock to trigger another registration attempt.
+  Clock::pause();
+  Clock::advance(slave::REGISTER_RETRY_INTERVAL_MAX);
+  Clock::settle();
+  Clock::resume();
+
+  AWAIT_READY(registerSlaveMessage2);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/778430e1/src/tests/master_authorization_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_authorization_tests.cpp 
b/src/tests/master_authorization_tests.cpp
index 80b9d49..f65b621 100644
--- a/src/tests/master_authorization_tests.cpp
+++ b/src/tests/master_authorization_tests.cpp
@@ -2390,10 +2390,12 @@ TEST_F(MasterAuthorizationTest, 
AuthorizedToRegisterAndReregisterAgent)
 }
 
 
-// This test verifies that the agent is shut down by the master if
-// it is not authorized to register.
+// This test verifies that an agent without the right ACLs
+// is not allowed to register and is not shut down.
 TEST_F(MasterAuthorizationTest, UnauthorizedToRegisterAgent)
 {
+  Clock::pause();
+
   // Set up ACLs that disallows the agent's principal to register.
   ACLs acls;
   mesos::ACL::RegisterAgent* acl = acls.add_register_agents();
@@ -2406,14 +2408,31 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToRegisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
-  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
   ASSERT_SOME(slave);
 
-  AWAIT_READY(shutdownMessage);
+  // Advance the clock to trigger the registration attempt.
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  Clock::settle();
+
+  AWAIT_READY(registerSlaveMessage);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::settle();
 }
 
 
@@ -2433,12 +2452,14 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   Try<Owned<cluster::Master>> master = StartMaster(flags);
   ASSERT_SOME(master);
 
-  slave::Flags slaveFlags = CreateSlaveFlags();
-  StandaloneMasterDetector detector(master.get()->pid);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
 
-  Future<Message> slaveRegisteredMessage =
-    FUTURE_MESSAGE(Eq(SlaveRegisteredMessage().GetTypeName()), _, _);
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
 
+  StandaloneMasterDetector detector(master.get()->pid);
   Try<Owned<cluster::Slave>> slave = StartSlave(&detector);
   ASSERT_SOME(slave);
 
@@ -2451,15 +2472,23 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToReregisterAgent)
   acl->mutable_agents()->set_type(ACL::Entity::NONE);
   flags.acls = acls;
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // The agent should try but not be able to reregister.
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveReregisteredMessage(), _, _);
 
   master = StartMaster(flags);
   ASSERT_SOME(master);
 
   detector.appoint(master.get()->pid);
 
-  AWAIT_READY(shutdownMessage);
+  AWAIT_READY(reregisterSlaveMessage);
+
+  // Settle to make sure neither `SlaveReregisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::pause();
+  Clock::settle();
 }
 
 
@@ -2512,10 +2541,12 @@ TEST_F(MasterAuthorizationTest, RetryRegisterAgent)
 }
 
 
-// This test verifies that the agent is shut down by the master if it is
-// not authorized to statically reserve any resources.
+// This test verifies that the agent is not allowed to register if it tries to
+// statically reserve resources, but it is not allowed to.
 TEST_F(MasterAuthorizationTest, UnauthorizedToStaticallyReserveResources)
 {
+  Clock::pause();
+
   // Set up ACLs so that the agent can (re)register.
   ACLs acls;
 
@@ -2538,8 +2569,15 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToStaticallyReserveResources)
   Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
   ASSERT_SOME(master);
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
 
@@ -2549,7 +2587,15 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToStaticallyReserveResources)
   Try<Owned<cluster::Slave>> agent = StartSlave(detector.get(), agentFlags);
   ASSERT_SOME(agent);
 
-  AWAIT_READY(shutdownMessage);
+  // Advance the clock to trigger the registration attempt.
+  Clock::advance(agentFlags.registration_backoff_factor);
+  Clock::settle();
+
+  AWAIT_READY(registerSlaveMessage);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::settle();
 }
 
 
@@ -2557,6 +2603,8 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToStaticallyReserveResources)
 // not authorized to statically reserve certain roles.
 TEST_F(MasterAuthorizationTest, UnauthorizedToStaticallyReserveRole)
 {
+  Clock::pause();
+
   // Set up ACLs so that the agent can (re)register.
   ACLs acls;
 
@@ -2587,8 +2635,15 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToStaticallyReserveRole)
   Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
   ASSERT_SOME(master);
 
-  Future<Message> shutdownMessage =
-    FUTURE_MESSAGE(Eq(ShutdownMessage().GetTypeName()), _, _);
+  // Previously, agents were shut down when registration failed due to
+  // authorization. We verify that this no longer occurs.
+  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
+
+  // We verify that the agent isn't allowed to register.
+  EXPECT_NO_FUTURE_PROTOBUFS(SlaveRegisteredMessage(), _, _);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
 
@@ -2598,7 +2653,15 @@ TEST_F(MasterAuthorizationTest, 
UnauthorizedToStaticallyReserveRole)
   Try<Owned<cluster::Slave>> agent = StartSlave(detector.get(), agentFlags);
   ASSERT_SOME(agent);
 
-  AWAIT_READY(shutdownMessage);
+  // Advance the clock to trigger the registration attempt.
+  Clock::advance(agentFlags.registration_backoff_factor);
+  Clock::settle();
+
+  AWAIT_READY(registerSlaveMessage);
+
+  // Settle to make sure neither `SlaveRegisteredMessage` nor
+  // `ShutdownMessage` are sent.
+  Clock::settle();
 }
 
 

Reply via email to