This is an automated email from the ASF dual-hosted git repository. josephwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 584c0be3e1868a251aede732eece2d30344f2f6a Author: Meng Zhu <[email protected]> AuthorDate: Wed Mar 6 11:03:28 2019 -0800 Added minimum capability check during master recovery. Upon recovery, the master will compare its own capabilities against the list of strings read from the registry. If the master is missing any of the capabilities read from the registry, the master will refuse to recover and try to provide remediation steps instead. Modifications to the registry's list will be added in future, within the logic for other RegistryOperations. For example, if support for Quota limit bursting (MESOS-8068) is added to a future master, setting a quota limit should cause the UpdateQuota RegistryOperation to add the appropriate minimum capability. A RemoveQuota RegistryOperation would remove the minimum capability if there are no other quota limits in use. Also adds a dedicated test. Review: https://reviews.apache.org/r/67762/ --- include/mesos/mesos.proto | 7 +++++++ include/mesos/v1/mesos.proto | 7 +++++++ src/master/master.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/master/master.hpp | 5 +++++ src/tests/master_tests.cpp | 25 +++++++++++++++++++++++++ 5 files changed, 82 insertions(+) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index d373a1d..0998732 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -910,6 +910,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index e53596f..3656aa7 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -908,6 +908,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/src/master/master.cpp b/src/master/master.cpp index 3bf84b8..b9db4ff 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -390,6 +390,32 @@ Master::Master( Master::~Master() {} +hashset<string> Master::misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry) +{ + if (registry.minimum_capabilities().size() == 0) { + return hashset<string>(); + } + + hashset<string> minimumCapabilities, masterCapabilities; + + foreach ( + const Registry::MinimumCapability& minimumCapability, + registry.minimum_capabilities()) { + minimumCapabilities.insert(minimumCapability.capability()); + } + + foreach ( + const MasterInfo::Capability& masterCapability, + masterInfo.capabilities()) { + masterCapabilities.insert( + MasterInfo::Capability::Type_Name(masterCapability.type())); + } + + return minimumCapabilities - masterCapabilities; +} + + // TODO(vinod): Update this interface to return failed futures when // capacity is reached. struct BoundedRateLimiter @@ -1647,6 +1673,18 @@ Future<Nothing> Master::recover() Future<Nothing> Master::_recover(const Registry& registry) { + hashset<string> missingCapabilities = + misingMinimumCapabilities(info_, registry); + + if (!missingCapabilities.empty()) { + LOG(ERROR) << "Master is missing the following minimum capabilities: " + << strings::join<hashset<string>>(", ", missingCapabilities) + << ". See the following documentation for steps to safely " + << "recover from this state: " + << "http://mesos.apache.org/documentation/latest/downgrades"; + EXIT(EXIT_FAILURE); + } + foreach (const Registry::Slave& slave, registry.slaves().slaves()) { SlaveInfo slaveInfo = slave.info(); diff --git a/src/master/master.hpp b/src/master/master.hpp index aceab34..90e0814 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -365,6 +365,11 @@ public: ~Master() override; + // Compare this master's capabilities with registry's minimum capability. + // Return the set of capabilities missing from this master. + static hashset<std::string> misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry); + // Message handlers. void submitScheduler( const std::string& name); diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp index 5ae8e1c..5a92683 100644 --- a/src/tests/master_tests.cpp +++ b/src/tests/master_tests.cpp @@ -2945,6 +2945,31 @@ TEST_F(MasterTest, RegistryUpdateAfterMasterFailover) } +TEST_F(MasterTest, RecoverWithMinimumCapability) +{ + Try<Owned<cluster::Master>> master = StartMaster(CreateMasterFlags()); + ASSERT_SOME(master); + + Registry registry; + + registry.add_minimum_capabilities()->set_capability( + MasterInfo::Capability::Type_Name(MasterInfo::Capability::AGENT_UPDATE)); + + EXPECT_TRUE( + Master::misingMinimumCapabilities(master.get()->getMasterInfo(), registry) + .empty()); + + registry.add_minimum_capabilities()->set_capability("SUPER_POWER"); + + hashset<string> result = + Master::misingMinimumCapabilities(master->get()->getMasterInfo(), registry); + + hashset<string> expected = {"SUPER_POWER"}; + + EXPECT_EQ(expected, result); +} + + // This test ensures that when a slave is recovered from the registry // but does not reregister with the master, it is marked unreachable // in the registry, the framework is informed that the slave is lost,
