Fixed races between "unreachable" and "unregister" slave transitions.

Now that we update the registry before updating the master's state when
performing these transitions, it is possible for the master to already
be marking a slave unreachable when an `UnregisterSlaveMessage` is
received, or vice versa. Detect these situations and ignore whichever
transition is triggered second.

Review: https://reviews.apache.org/r/51707/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/81fe779b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/81fe779b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/81fe779b

Branch: refs/heads/master
Commit: 81fe779b46e48eff2fde51e8d21402cb7eee006b
Parents: 5e66002
Author: Neil Conway <neil.con...@gmail.com>
Authored: Mon Sep 19 15:48:36 2016 -0700
Committer: Vinod Kone <vinodk...@gmail.com>
Committed: Mon Sep 19 15:48:36 2016 -0700

----------------------------------------------------------------------
 src/master/master.cpp     |  24 ++++-
 src/tests/slave_tests.cpp | 229 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 252 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/81fe779b/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 3095f8a..38ca425 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -5839,6 +5839,12 @@ void Master::markUnreachable(const SlaveID& slaveId)
     return;
   }
 
+  if (slaves.removing.contains(slaveId)) {
+    LOG(WARNING) << "Not marking agent " << slaveId
+                 << " unreachable because it is unregistering";
+    return;
+  }
+
   LOG(INFO) << "Marking agent " << *slave
             << " unreachable: health check timed out";
 
@@ -7284,10 +7290,26 @@ void Master::removeSlave(
 {
   CHECK_NOTNULL(slave);
 
-  LOG(INFO) << "Removing agent " << *slave << ": " << message;
+  // It would be better to remove the slave here instead of continuing
+  // to mark it unreachable, but probably not worth the complexity.
+  if (slaves.markingUnreachable.contains(slave->id)) {
+    LOG(WARNING) << "Ignoring removal of agent " << *slave
+                 << " that is in the process of being marked unreachable";
+    return;
+  }
+
+  // This should not be possible, but we protect against it anyway for
+  // the sake of paranoia.
+  if (slaves.removing.contains(slave->id)) {
+    LOG(WARNING) << "Ignoring removal of agent " << *slave
+                 << " that is in the process of being removed";
+    return;
+  }
 
   slaves.removing.insert(slave->id);
 
+  LOG(INFO) << "Removing agent " << *slave << ": " << message;
+
   // Remove this slave from the registrar. Note that we update the
   // registry BEFORE we update the master's in-memory state; this
   // means that until the registry operation has completed, the slave

http://git-wip-us.apache.org/repos/asf/mesos/blob/81fe779b/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index aa30118..8ad3547 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -2886,6 +2886,235 @@ TEST_F(SlaveTest, HealthCheckUnregisterRace)
 }
 
 
+// This test checks that the master behaves correctly when a slave
+// fails health checks and is in the process of being marked
+// unreachable in the registry, but concurrently the slave unregisters
+// from the master.
+TEST_F(SlaveTest, UnreachableThenUnregisterRace)
+{
+  master::Flags masterFlags = CreateMasterFlags();
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  // Set these expectations up before we spawn the slave so that we
+  // don't miss the first PING.
+  Future<Message> ping = FUTURE_MESSAGE(
+      Eq(PingSlaveMessage().GetTypeName()), _, _);
+
+  // Drop all the PONGs to simulate slave partition.
+  DROP_PROTOBUFS(PongSlaveMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<Nothing> resourceOffers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureSatisfy(&resourceOffers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  // Need to make sure the framework AND slave have registered with
+  // master. Waiting for resource offers should accomplish both.
+  AWAIT_READY(resourceOffers);
+
+  Clock::pause();
+
+  EXPECT_CALL(sched, offerRescinded(&driver, _))
+    .Times(AtMost(1));
+
+  Future<Nothing> slaveLost;
+  EXPECT_CALL(sched, slaveLost(&driver, _))
+    .WillOnce(FutureSatisfy(&slaveLost));
+
+  // Now advance through the PINGs.
+  size_t pings = 0;
+  while (true) {
+    AWAIT_READY(ping);
+    pings++;
+    if (pings == masterFlags.max_agent_ping_timeouts) {
+      break;
+    }
+    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
+    Clock::advance(masterFlags.agent_ping_timeout);
+  }
+
+  // Intercept the next registry operation. This operation should be
+  // attempting to mark the slave unreachable.
+  Future<Owned<master::Operation>> markUnreachable;
+  Promise<bool> markUnreachableContinue;
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .WillOnce(DoAll(FutureArg<0>(&markUnreachable),
+                    Return(markUnreachableContinue.future())));
+
+  Clock::advance(masterFlags.agent_ping_timeout);
+
+  AWAIT_READY(markUnreachable);
+  EXPECT_NE(
+      nullptr,
+      dynamic_cast<master::MarkSlaveUnreachable*>(
+          markUnreachable.get().get()));
+
+  // Cause the slave to shutdown gracefully by sending it SIGUSR1.
+  // This should result in the slave sending `UnregisterSlaveMessage`
+  // to the master. Normally, the master would then remove the slave
+  // from the registry, but since the slave is already being marked
+  // unreachable, the master should ignore the unregister message.
+  Future<UnregisterSlaveMessage> unregisterSlaveMessage =
+    FUTURE_PROTOBUF(
+        UnregisterSlaveMessage(),
+        slave.get()->pid,
+        master.get()->pid);
+
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  kill(getpid(), SIGUSR1);
+
+  AWAIT_READY(unregisterSlaveMessage);
+
+  // Apply the registry operation to mark the slave unreachable, then
+  // pass the result back to the master to allow it to continue.
+  Future<bool> applyUnreachable =
+    master.get()->registrar->unmocked_apply(markUnreachable.get());
+
+  AWAIT_READY(applyUnreachable);
+  markUnreachableContinue.set(applyUnreachable.get());
+
+  AWAIT_READY(slaveLost);
+
+  Clock::resume();
+
+  driver.stop();
+  driver.join();
+}
+
+
+// This test checks that the master behaves correctly when a slave is
+// in the process of unregistering from the master when it is marked
+// unreachable.
+TEST_F(SlaveTest, UnregisterThenUnreachableRace)
+{
+  master::Flags masterFlags = CreateMasterFlags();
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  // Set these expectations up before we spawn the slave so that we
+  // don't miss the first PING.
+  Future<Message> ping = FUTURE_MESSAGE(
+      Eq(PingSlaveMessage().GetTypeName()), _, _);
+
+  // Drop all the PONGs to simulate slave partition.
+  DROP_PROTOBUFS(PongSlaveMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<vector<Offer>> resourceOffers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&resourceOffers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  // Need to make sure the framework AND slave have registered with
+  // master. Waiting for resource offers should accomplish both.
+  AWAIT_READY(resourceOffers);
+
+  ASSERT_EQ(1u, resourceOffers.get().size());
+  SlaveID slaveId = resourceOffers.get()[0].slave_id();
+
+  Clock::pause();
+
+  // Simulate the slave shutting down gracefully. This might happen
+  // normally if the slave receives SIGUSR1. However, we don't use
+  // that approach here, because that would also result in an `exited`
+  // event at the master; we want to test the case where the slave
+  // begins to shutdown but the socket hasn't been closed yet. Hence,
+  // we spoof the `UnregisterSlaveMessage`.
+  //
+  // When the master receives the `UnregisterSlaveMessage`, it should
+  // attempt to remove the slave from the registry.
+  Future<Owned<master::Operation>> removeSlave;
+  Promise<bool> removeSlaveContinue;
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .WillOnce(DoAll(FutureArg<0>(&removeSlave),
+                    Return(removeSlaveContinue.future())));
+
+  process::dispatch(master.get()->pid,
+                    &Master::unregisterSlave,
+                    slave.get()->pid,
+                    slaveId);
+
+  AWAIT_READY(removeSlave);
+  EXPECT_NE(
+      nullptr,
+      dynamic_cast<master::RemoveSlave*>(removeSlave.get().get()));
+
+  // Next, cause the slave to fail health checks; master will attempt
+  // to mark it unreachable.
+  size_t pings = 0;
+  while (true) {
+    AWAIT_READY(ping);
+    pings++;
+    if (pings == masterFlags.max_agent_ping_timeouts) {
+      break;
+    }
+    ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
+    Clock::advance(masterFlags.agent_ping_timeout);
+  }
+
+  // We expect the `SlaveObserver` to dispatch a message to the master
+  // to mark the slave unreachable. The master should ignore this
+  // request because the slave is already being removed.
+  Future<Nothing> unreachableDispatch =
+    FUTURE_DISPATCH(master.get()->pid, &Master::markUnreachable);
+
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  Clock::advance(masterFlags.agent_ping_timeout);
+
+  AWAIT_READY(unreachableDispatch);
+
+  EXPECT_CALL(sched, offerRescinded(&driver, _))
+    .Times(AtMost(1));
+
+  Future<Nothing> slaveLost;
+  EXPECT_CALL(sched, slaveLost(&driver, _))
+    .WillOnce(FutureSatisfy(&slaveLost));
+
+  // Apply the registry operation to remove the slave, then pass the
+  // result back to the master to allow it to continue.
+  Future<bool> applyRemove =
+    master.get()->registrar->unmocked_apply(removeSlave.get());
+
+  AWAIT_READY(applyRemove);
+  removeSlaveContinue.set(applyRemove.get());
+
+  AWAIT_READY(slaveLost);
+
+  Clock::resume();
+
+  driver.stop();
+  driver.join();
+}
+
+
 // This test ensures that a killTask() can happen between runTask()
 // and _run() and then gets "handled properly". This means that
 // the task never gets started, but also does not get lost. The end

Reply via email to