Stopped sending checkpoint resources message on agent re-registration.

Given that resource provider capable agents will send update slave
message to the master during re-registration, no need for the master
to send checkpoint resources message to the agent anymore.

This also makes the code more consistent because agent should be the
source of truth. This also eliminates the possible retry incurred by
this message, which is never the intention.

Review: https://reviews.apache.org/r/62879


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/33d1ff17
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/33d1ff17
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/33d1ff17

Branch: refs/heads/master
Commit: 33d1ff1798f8cbf83b4e5f7bc79dbf8e231dff1f
Parents: e9ac9f8
Author: Jie Yu <[email protected]>
Authored: Tue Oct 10 20:17:53 2017 -0700
Committer: Jie Yu <[email protected]>
Committed: Sun Oct 29 15:57:28 2017 +0100

----------------------------------------------------------------------
 src/master/master.cpp | 62 ++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/33d1ff17/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index c118b9d..5b2c9a0 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6318,41 +6318,49 @@ void Master::_reregisterSlave(
 
     slaves.reregistering.erase(slaveInfo.id());
 
-    // Send checkpointed resources to the agent. This is important for
+    // If the agent is not resource provider capable (legacy agent),
+    // send checkpointed resources to the agent. This is important for
     // the cases where the master didn't fail over. In that case, the
     // master might have already applied an operation that the agent
     // didn't see (e.g., due to a breaking connection). This message
     // will sync the state between the master and the agent about
     // checkpointed resources.
-    CheckpointResourcesMessage message;
-
-    message.mutable_resources()->CopyFrom(slave->checkpointedResources);
-
-    if (!slave->capabilities.reservationRefinement) {
-      // If the agent is not refinement-capable, don't send it
-      // checkpointed resources that contain refined reservations. This
-      // might occur if a reservation refinement is created but never
-      // reaches the agent (e.g., due to network partition), and then
-      // the agent is downgraded before the partition heals.
-      //
-      // TODO(neilc): It would probably be better to prevent the agent
-      // from re-registering in this scenario.
-      Try<Nothing> result = downgradeResources(message.mutable_resources());
-      if (result.isError()) {
-        LOG(WARNING) << "Not sending updated checkpointed resouces "
-                     << slave->checkpointedResources
-                     << " with refined reservations, since agent " << *slave
-                     << " is not RESERVATION_REFINEMENT-capable.";
-
-        return;
+    //
+    // New agents that are resource provider capable will always
+    // update the master with total resources during re-registration.
+    // Therefore, no need to send checkpointed resources to the new
+    // agent in this case.
+    if (!slave->capabilities.resourceProvider) {
+      CheckpointResourcesMessage message;
+
+      message.mutable_resources()->CopyFrom(slave->checkpointedResources);
+
+      if (!slave->capabilities.reservationRefinement) {
+        // If the agent is not refinement-capable, don't send it
+        // checkpointed resources that contain refined reservations. This
+        // might occur if a reservation refinement is created but never
+        // reaches the agent (e.g., due to network partition), and then
+        // the agent is downgraded before the partition heals.
+        //
+        // TODO(neilc): It would probably be better to prevent the agent
+        // from re-registering in this scenario.
+        Try<Nothing> result = downgradeResources(message.mutable_resources());
+        if (result.isError()) {
+          LOG(WARNING) << "Not sending updated checkpointed resouces "
+                       << slave->checkpointedResources
+                       << " with refined reservations, since agent " << *slave
+                       << " is not RESERVATION_REFINEMENT-capable.";
+
+          return;
+        }
       }
-    }
 
-    LOG(INFO) << "Sending updated checkpointed resources "
-              << slave->checkpointedResources
-              << " to agent " << *slave;
+      LOG(INFO) << "Sending updated checkpointed resources "
+                << slave->checkpointedResources
+                << " to agent " << *slave;
 
-    send(slave->pid, message);
+      send(slave->pid, message);
+    }
 
     return;
   }

Reply via email to