This is an automated email from the ASF dual-hosted git repository. mzhu pushed a commit to branch 1.6.x in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 5d4f2cf4d3f6c64698f730dd6cc18dd1c26df990 Author: Meng Zhu <[email protected]> AuthorDate: Wed Mar 6 11:03:28 2019 -0800 Added minimum capability check during master recovery. Upon recovery, the master will compare its own capabilities against the list of strings read from the registry. If the master is missing any of the capabilities read from the registry, the master will refuse to recover and try to provide remediation steps instead. Modifications to the registry's list will be added in future, within the logic for other RegistryOperations. For example, if support for Quota limit bursting (MESOS-8068) is added to a future master, setting a quota limit should cause the UpdateQuota RegistryOperation to add the appropriate minimum capability. A RemoveQuota RegistryOperation would remove the minimum capability if there are no other quota limits in use. Also adds a dedicated test. Review: https://reviews.apache.org/r/67762/ --- include/mesos/mesos.proto | 7 +++++++ include/mesos/v1/mesos.proto | 7 +++++++ src/master/master.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/master/master.hpp | 5 +++++ src/tests/master_tests.cpp | 25 +++++++++++++++++++++++++ 5 files changed, 82 insertions(+) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index 4c66188..9dd1cd2 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -891,6 +891,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index 038892f..1b5e958 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -889,6 +889,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/src/master/master.cpp b/src/master/master.cpp index 77629e4..28a1593 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -391,6 +391,32 @@ Master::Master( Master::~Master() {} +hashset<string> Master::misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry) +{ + if (registry.minimum_capabilities().size() == 0) { + return hashset<string>(); + } + + hashset<string> minimumCapabilities, masterCapabilities; + + foreach ( + const Registry::MinimumCapability& minimumCapability, + registry.minimum_capabilities()) { + minimumCapabilities.insert(minimumCapability.capability()); + } + + foreach ( + const MasterInfo::Capability& masterCapability, + masterInfo.capabilities()) { + masterCapabilities.insert( + MasterInfo::Capability::Type_Name(masterCapability.type())); + } + + return minimumCapabilities - masterCapabilities; +} + + // TODO(vinod): Update this interface to return failed futures when // capacity is reached. struct BoundedRateLimiter @@ -1720,6 +1746,18 @@ Future<Nothing> Master::recover() Future<Nothing> Master::_recover(const Registry& registry) { + hashset<string> missingCapabilities = + misingMinimumCapabilities(info_, registry); + + if (!missingCapabilities.empty()) { + LOG(ERROR) << "Master is missing the following minimum capabilities: " + << strings::join<hashset<string>>(", ", missingCapabilities) + << ". See the following documentation for steps to safely " + << "recover from this state: " + << "http://mesos.apache.org/documentation/latest/downgrades"; + EXIT(EXIT_FAILURE); + } + foreach (const Registry::Slave& slave, registry.slaves().slaves()) { SlaveInfo slaveInfo = slave.info(); diff --git a/src/master/master.hpp b/src/master/master.hpp index 52c508a..c9a3ce2 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -441,6 +441,11 @@ public: virtual ~Master(); + // Compare this master's capabilities with registry's minimum capability. + // Return the set of capabilities missing from this master. + static hashset<std::string> misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry); + // Message handlers. void submitScheduler( const std::string& name); diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp index 69bcbad..3ccf3dc 100644 --- a/src/tests/master_tests.cpp +++ b/src/tests/master_tests.cpp @@ -2916,6 +2916,31 @@ TEST_F(MasterTest, RegistryUpdateAfterMasterFailover) } +TEST_F(MasterTest, RecoverWithMinimumCapability) +{ + Try<Owned<cluster::Master>> master = StartMaster(CreateMasterFlags()); + ASSERT_SOME(master); + + Registry registry; + + registry.add_minimum_capabilities()->set_capability( + MasterInfo::Capability::Type_Name(MasterInfo::Capability::AGENT_UPDATE)); + + EXPECT_TRUE( + Master::misingMinimumCapabilities(master.get()->getMasterInfo(), registry) + .empty()); + + registry.add_minimum_capabilities()->set_capability("SUPER_POWER"); + + hashset<string> result = + Master::misingMinimumCapabilities(master->get()->getMasterInfo(), registry); + + hashset<string> expected = {"SUPER_POWER"}; + + EXPECT_EQ(expected, result); +} + + // This test ensures that when a slave is recovered from the registry // but does not reregister with the master, it is marked unreachable // in the registry, the framework is informed that the slave is lost,
