This is an automated email from the ASF dual-hosted git repository.

josephwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 584c0be3e1868a251aede732eece2d30344f2f6a
Author: Meng Zhu <[email protected]>
AuthorDate: Wed Mar 6 11:03:28 2019 -0800

    Added minimum capability check during master recovery.
    
    Upon recovery, the master will compare its own capabilities against the
    list of strings read from the registry.  If the master is missing any
    of the capabilities read from the registry, the master will refuse to
    recover and try to provide remediation steps instead.
    
    Modifications to the registry's list will be added in future, within
    the logic for other RegistryOperations.  For example, if support for
    Quota limit bursting (MESOS-8068) is added to a future master,
    setting a quota limit should cause the UpdateQuota RegistryOperation
    to add the appropriate minimum capability.  A RemoveQuota
    RegistryOperation would remove the minimum capability if there are no
    other quota limits in use.
    
    Also adds a dedicated test.
    
    Review: https://reviews.apache.org/r/67762/
---
 include/mesos/mesos.proto    |  7 +++++++
 include/mesos/v1/mesos.proto |  7 +++++++
 src/master/master.cpp        | 38 ++++++++++++++++++++++++++++++++++++++
 src/master/master.hpp        |  5 +++++
 src/tests/master_tests.cpp   | 25 +++++++++++++++++++++++++
 5 files changed, 82 insertions(+)

diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index d373a1d..0998732 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -910,6 +910,13 @@ message MasterInfo {
     enum Type {
       UNKNOWN = 0;
 
+      // NOTE: When the master starts to use a new capability that
+      // may prevent compatible downgrade, remember to add the
+      // capability to `Registry::MinimumCapability`. Conversely,
+      // the added minimum capability should be removed if the capability
+      // is deemed to be no longer required for compatible downgrade.
+      // See MESOS-8878 for more details.
+
       // The master can handle slaves whose state
       // changes after reregistering.
       AGENT_UPDATE = 1;
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index e53596f..3656aa7 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -908,6 +908,13 @@ message MasterInfo {
     enum Type {
       UNKNOWN = 0;
 
+      // NOTE: When the master starts to use a new capability that
+      // may prevent compatible downgrade, remember to add the
+      // capability to `Registry::MinimumCapability`. Conversely,
+      // the added minimum capability should be removed if the capability
+      // is deemed to be no longer required for compatible downgrade.
+      // See MESOS-8878 for more details.
+
       // The master can handle slaves whose state
       // changes after reregistering.
       AGENT_UPDATE = 1;
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 3bf84b8..b9db4ff 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -390,6 +390,32 @@ Master::Master(
 Master::~Master() {}
 
 
+hashset<string> Master::misingMinimumCapabilities(
+    const MasterInfo& masterInfo, const Registry& registry)
+{
+  if (registry.minimum_capabilities().size() == 0) {
+    return hashset<string>();
+  }
+
+  hashset<string> minimumCapabilities, masterCapabilities;
+
+  foreach (
+      const Registry::MinimumCapability& minimumCapability,
+      registry.minimum_capabilities()) {
+    minimumCapabilities.insert(minimumCapability.capability());
+  }
+
+  foreach (
+      const MasterInfo::Capability& masterCapability,
+      masterInfo.capabilities()) {
+    masterCapabilities.insert(
+        MasterInfo::Capability::Type_Name(masterCapability.type()));
+  }
+
+  return minimumCapabilities - masterCapabilities;
+}
+
+
 // TODO(vinod): Update this interface to return failed futures when
 // capacity is reached.
 struct BoundedRateLimiter
@@ -1647,6 +1673,18 @@ Future<Nothing> Master::recover()
 
 Future<Nothing> Master::_recover(const Registry& registry)
 {
+  hashset<string> missingCapabilities =
+    misingMinimumCapabilities(info_, registry);
+
+  if (!missingCapabilities.empty()) {
+    LOG(ERROR) << "Master is missing the following minimum capabilities: "
+               << strings::join<hashset<string>>(", ", missingCapabilities)
+               << ". See the following documentation for steps to safely "
+               << "recover from this state: "
+               << "http://mesos.apache.org/documentation/latest/downgrades";;
+    EXIT(EXIT_FAILURE);
+  }
+
   foreach (const Registry::Slave& slave, registry.slaves().slaves()) {
     SlaveInfo slaveInfo = slave.info();
 
diff --git a/src/master/master.hpp b/src/master/master.hpp
index aceab34..90e0814 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -365,6 +365,11 @@ public:
 
   ~Master() override;
 
+  // Compare this master's capabilities with registry's minimum capability.
+  // Return the set of capabilities missing from this master.
+  static hashset<std::string> misingMinimumCapabilities(
+      const MasterInfo& masterInfo, const Registry& registry);
+
   // Message handlers.
   void submitScheduler(
       const std::string& name);
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 5ae8e1c..5a92683 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -2945,6 +2945,31 @@ TEST_F(MasterTest, RegistryUpdateAfterMasterFailover)
 }
 
 
+TEST_F(MasterTest, RecoverWithMinimumCapability)
+{
+  Try<Owned<cluster::Master>> master = StartMaster(CreateMasterFlags());
+  ASSERT_SOME(master);
+
+  Registry registry;
+
+  registry.add_minimum_capabilities()->set_capability(
+      MasterInfo::Capability::Type_Name(MasterInfo::Capability::AGENT_UPDATE));
+
+  EXPECT_TRUE(
+      Master::misingMinimumCapabilities(master.get()->getMasterInfo(), 
registry)
+        .empty());
+
+  registry.add_minimum_capabilities()->set_capability("SUPER_POWER");
+
+  hashset<string> result =
+    Master::misingMinimumCapabilities(master->get()->getMasterInfo(), 
registry);
+
+  hashset<string> expected = {"SUPER_POWER"};
+
+  EXPECT_EQ(expected, result);
+}
+
+
 // This test ensures that when a slave is recovered from the registry
 // but does not reregister with the master, it is marked unreachable
 // in the registry, the framework is informed that the slave is lost,

Reply via email to