This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a commit to branch 1.6.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 5d4f2cf4d3f6c64698f730dd6cc18dd1c26df990
Author: Meng Zhu <[email protected]>
AuthorDate: Wed Mar 6 11:03:28 2019 -0800

    Added minimum capability check during master recovery.
    
    Upon recovery, the master will compare its own capabilities against the
    list of strings read from the registry.  If the master is missing any
    of the capabilities read from the registry, the master will refuse to
    recover and try to provide remediation steps instead.
    
    Modifications to the registry's list will be added in future, within
    the logic for other RegistryOperations.  For example, if support for
    Quota limit bursting (MESOS-8068) is added to a future master,
    setting a quota limit should cause the UpdateQuota RegistryOperation
    to add the appropriate minimum capability.  A RemoveQuota
    RegistryOperation would remove the minimum capability if there are no
    other quota limits in use.
    
    Also adds a dedicated test.
    
    Review: https://reviews.apache.org/r/67762/
---
 include/mesos/mesos.proto    |  7 +++++++
 include/mesos/v1/mesos.proto |  7 +++++++
 src/master/master.cpp        | 38 ++++++++++++++++++++++++++++++++++++++
 src/master/master.hpp        |  5 +++++
 src/tests/master_tests.cpp   | 25 +++++++++++++++++++++++++
 5 files changed, 82 insertions(+)

diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 4c66188..9dd1cd2 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -891,6 +891,13 @@ message MasterInfo {
     enum Type {
       UNKNOWN = 0;
 
+      // NOTE: When the master starts to use a new capability that
+      // may prevent compatible downgrade, remember to add the
+      // capability to `Registry::MinimumCapability`. Conversely,
+      // the added minimum capability should be removed if the capability
+      // is deemed to be no longer required for compatible downgrade.
+      // See MESOS-8878 for more details.
+
       // The master can handle slaves whose state
       // changes after reregistering.
       AGENT_UPDATE = 1;
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 038892f..1b5e958 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -889,6 +889,13 @@ message MasterInfo {
     enum Type {
       UNKNOWN = 0;
 
+      // NOTE: When the master starts to use a new capability that
+      // may prevent compatible downgrade, remember to add the
+      // capability to `Registry::MinimumCapability`. Conversely,
+      // the added minimum capability should be removed if the capability
+      // is deemed to be no longer required for compatible downgrade.
+      // See MESOS-8878 for more details.
+
       // The master can handle slaves whose state
       // changes after reregistering.
       AGENT_UPDATE = 1;
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 77629e4..28a1593 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -391,6 +391,32 @@ Master::Master(
 Master::~Master() {}
 
 
+hashset<string> Master::misingMinimumCapabilities(
+    const MasterInfo& masterInfo, const Registry& registry)
+{
+  if (registry.minimum_capabilities().size() == 0) {
+    return hashset<string>();
+  }
+
+  hashset<string> minimumCapabilities, masterCapabilities;
+
+  foreach (
+      const Registry::MinimumCapability& minimumCapability,
+      registry.minimum_capabilities()) {
+    minimumCapabilities.insert(minimumCapability.capability());
+  }
+
+  foreach (
+      const MasterInfo::Capability& masterCapability,
+      masterInfo.capabilities()) {
+    masterCapabilities.insert(
+        MasterInfo::Capability::Type_Name(masterCapability.type()));
+  }
+
+  return minimumCapabilities - masterCapabilities;
+}
+
+
 // TODO(vinod): Update this interface to return failed futures when
 // capacity is reached.
 struct BoundedRateLimiter
@@ -1720,6 +1746,18 @@ Future<Nothing> Master::recover()
 
 Future<Nothing> Master::_recover(const Registry& registry)
 {
+  hashset<string> missingCapabilities =
+    misingMinimumCapabilities(info_, registry);
+
+  if (!missingCapabilities.empty()) {
+    LOG(ERROR) << "Master is missing the following minimum capabilities: "
+               << strings::join<hashset<string>>(", ", missingCapabilities)
+               << ". See the following documentation for steps to safely "
+               << "recover from this state: "
+               << "http://mesos.apache.org/documentation/latest/downgrades";;
+    EXIT(EXIT_FAILURE);
+  }
+
   foreach (const Registry::Slave& slave, registry.slaves().slaves()) {
     SlaveInfo slaveInfo = slave.info();
 
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 52c508a..c9a3ce2 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -441,6 +441,11 @@ public:
 
   virtual ~Master();
 
+  // Compare this master's capabilities with registry's minimum capability.
+  // Return the set of capabilities missing from this master.
+  static hashset<std::string> misingMinimumCapabilities(
+      const MasterInfo& masterInfo, const Registry& registry);
+
   // Message handlers.
   void submitScheduler(
       const std::string& name);
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 69bcbad..3ccf3dc 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -2916,6 +2916,31 @@ TEST_F(MasterTest, RegistryUpdateAfterMasterFailover)
 }
 
 
+TEST_F(MasterTest, RecoverWithMinimumCapability)
+{
+  Try<Owned<cluster::Master>> master = StartMaster(CreateMasterFlags());
+  ASSERT_SOME(master);
+
+  Registry registry;
+
+  registry.add_minimum_capabilities()->set_capability(
+      MasterInfo::Capability::Type_Name(MasterInfo::Capability::AGENT_UPDATE));
+
+  EXPECT_TRUE(
+      Master::misingMinimumCapabilities(master.get()->getMasterInfo(), 
registry)
+        .empty());
+
+  registry.add_minimum_capabilities()->set_capability("SUPER_POWER");
+
+  hashset<string> result =
+    Master::misingMinimumCapabilities(master->get()->getMasterInfo(), 
registry);
+
+  hashset<string> expected = {"SUPER_POWER"};
+
+  EXPECT_EQ(expected, result);
+}
+
+
 // This test ensures that when a slave is recovered from the registry
 // but does not reregister with the master, it is marked unreachable
 // in the registry, the framework is informed that the slave is lost,

Reply via email to