Made the agent verify resource compatibility in checkpointResources().

In handling CheckpointResourcesMessage, the agent should first make
sure the checkpointed resources are compatible before it syncs local
disk state.

Review: https://reviews.apache.org/r/51866


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b24d99b6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b24d99b6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b24d99b6

Branch: refs/heads/master
Commit: b24d99b68bae1445a5948b68c0130d7c1c08ca71
Parents: e65933c
Author: Jiang Yan Xu <xuj...@apple.com>
Authored: Tue Sep 13 14:53:34 2016 -0700
Committer: Jiang Yan Xu <xuj...@apple.com>
Committed: Mon Sep 19 14:59:11 2016 -0700

----------------------------------------------------------------------
 src/slave/slave.cpp | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b24d99b6/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 7f99e46..4ced3e7 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2736,6 +2736,19 @@ void Slave::checkpointResources(const vector<Resource>& 
_checkpointedResources)
     return;
   }
 
+  // This is a sanity check to verify that the new checkpointed
+  // resources are compatible with the agent resources specified
+  // through the '--resources' command line flag. The resources
+  // should be guaranteed compatible by the master.
+  Try<Resources> totalResources = applyCheckpointedResources(
+      info.resources(),
+      newCheckpointedResources);
+
+  CHECK_SOME(totalResources)
+    << "Failed to apply checkpointed resources "
+    << newCheckpointedResources << " to agent's resources "
+    << info.resources();
+
   // Store the target checkpoint resources. We commit the checkpoint
   // only after all operations are successful. If any of the operations
   // fail, the agent exits and the update to checkpointed resources
@@ -5051,8 +5064,11 @@ Future<Nothing> Slave::recover(const 
Result<state::State>& state)
     }
 
     // This is to verify that the checkpointed resources are
-    // compatible with the slave resources specified through the
-    // '--resources' command line flag.
+    // compatible with the agent resources specified through the
+    // '--resources' command line flag. The compatibility has been
+    // verified by the old agent but the flag may have changed during
+    // agent restart in an incompatible way and the operator may need
+    // to either fix the flag or the checkpointed resources.
     Try<Resources> totalResources = applyCheckpointedResources(
         info.resources(), checkpointedResources);
 

Reply via email to