This is an automated email from the ASF dual-hosted git repository. mzhu pushed a commit to branch 1.7.x in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 18de75e141ed2e879601a808034eede20db131f2 Author: Meng Zhu <[email protected]> AuthorDate: Wed Mar 6 11:03:28 2019 -0800 Added minimum capability check during master recovery. Upon recovery, the master will compare its own capabilities against the list of strings read from the registry. If the master is missing any of the capabilities read from the registry, the master will refuse to recover and try to provide remediation steps instead. Modifications to the registry's list will be added in future, within the logic for other RegistryOperations. For example, if support for Quota limit bursting (MESOS-8068) is added to a future master, setting a quota limit should cause the UpdateQuota RegistryOperation to add the appropriate minimum capability. A RemoveQuota RegistryOperation would remove the minimum capability if there are no other quota limits in use. Also adds a dedicated test. Review: https://reviews.apache.org/r/67762/ --- include/mesos/mesos.proto | 7 +++++++ include/mesos/v1/mesos.proto | 7 +++++++ src/master/master.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/master/master.hpp | 5 +++++ src/tests/master_tests.cpp | 25 +++++++++++++++++++++++++ 5 files changed, 82 insertions(+) diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto index e984541..97b1e0c 100644 --- a/include/mesos/mesos.proto +++ b/include/mesos/mesos.proto @@ -891,6 +891,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto index dac7f51..62930b0 100644 --- a/include/mesos/v1/mesos.proto +++ b/include/mesos/v1/mesos.proto @@ -889,6 +889,13 @@ message MasterInfo { enum Type { UNKNOWN = 0; + // NOTE: When the master starts to use a new capability that + // may prevent compatible downgrade, remember to add the + // capability to `Registry::MinimumCapability`. Conversely, + // the added minimum capability should be removed if the capability + // is deemed to be no longer required for compatible downgrade. + // See MESOS-8878 for more details. + // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; diff --git a/src/master/master.cpp b/src/master/master.cpp index 3733d5a..3092608 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -391,6 +391,32 @@ Master::Master( Master::~Master() {} +hashset<string> Master::misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry) +{ + if (registry.minimum_capabilities().size() == 0) { + return hashset<string>(); + } + + hashset<string> minimumCapabilities, masterCapabilities; + + foreach ( + const Registry::MinimumCapability& minimumCapability, + registry.minimum_capabilities()) { + minimumCapabilities.insert(minimumCapability.capability()); + } + + foreach ( + const MasterInfo::Capability& masterCapability, + masterInfo.capabilities()) { + masterCapabilities.insert( + MasterInfo::Capability::Type_Name(masterCapability.type())); + } + + return minimumCapabilities - masterCapabilities; +} + + // TODO(vinod): Update this interface to return failed futures when // capacity is reached. struct BoundedRateLimiter @@ -1671,6 +1697,18 @@ Future<Nothing> Master::recover() Future<Nothing> Master::_recover(const Registry& registry) { + hashset<string> missingCapabilities = + misingMinimumCapabilities(info_, registry); + + if (!missingCapabilities.empty()) { + LOG(ERROR) << "Master is missing the following minimum capabilities: " + << strings::join<hashset<string>>(", ", missingCapabilities) + << ". See the following documentation for steps to safely " + << "recover from this state: " + << "http://mesos.apache.org/documentation/latest/downgrades"; + EXIT(EXIT_FAILURE); + } + foreach (const Registry::Slave& slave, registry.slaves().slaves()) { SlaveInfo slaveInfo = slave.info(); diff --git a/src/master/master.hpp b/src/master/master.hpp index b367aad..2bfe255 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -454,6 +454,11 @@ public: ~Master() override; + // Compare this master's capabilities with registry's minimum capability. + // Return the set of capabilities missing from this master. + static hashset<std::string> misingMinimumCapabilities( + const MasterInfo& masterInfo, const Registry& registry); + // Message handlers. void submitScheduler( const std::string& name); diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp index 9d5d5a3..0bf466b 100644 --- a/src/tests/master_tests.cpp +++ b/src/tests/master_tests.cpp @@ -2937,6 +2937,31 @@ TEST_F(MasterTest, RegistryUpdateAfterMasterFailover) } +TEST_F(MasterTest, RecoverWithMinimumCapability) +{ + Try<Owned<cluster::Master>> master = StartMaster(CreateMasterFlags()); + ASSERT_SOME(master); + + Registry registry; + + registry.add_minimum_capabilities()->set_capability( + MasterInfo::Capability::Type_Name(MasterInfo::Capability::AGENT_UPDATE)); + + EXPECT_TRUE( + Master::misingMinimumCapabilities(master.get()->getMasterInfo(), registry) + .empty()); + + registry.add_minimum_capabilities()->set_capability("SUPER_POWER"); + + hashset<string> result = + Master::misingMinimumCapabilities(master->get()->getMasterInfo(), registry); + + hashset<string> expected = {"SUPER_POWER"}; + + EXPECT_EQ(expected, result); +} + + // This test ensures that when a slave is recovered from the registry // but does not reregister with the master, it is marked unreachable // in the registry, the framework is informed that the slave is lost,
