This is an automated email from the ASF dual-hosted git repository. mzhu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 16f0b0c295960e397e56f6d504b8075cb62e6e4f Author: Meng Zhu <[email protected]> AuthorDate: Fri Jul 5 15:41:01 2019 -0700 Added overcommit and hierarchical inclusion check for `UPDATE_QUOTA`. The overcommit check validates that the total quota guarantees in the cluster is contained by the cluster capacity. The hierarchical inclusion check validates that the sum of children's guarantees is contained by the parent guarantee. Further validation is needed for: - Check a role's limit is less than its current consumption. - Check a role's limit is less than its parent's limit. Review: https://reviews.apache.org/r/71020 --- src/master/quota_handler.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/src/master/quota_handler.cpp b/src/master/quota_handler.cpp index 9a43110..9afab28 100644 --- a/src/master/quota_handler.cpp +++ b/src/master/quota_handler.cpp @@ -89,7 +89,8 @@ namespace master { // parent role and its children. // // TODO(mzhu): The above check is only about guarantees. We should extend -// the check to also cover limits. +// the check to also cover limits: a role's limit is less than its +// parent's limit. class QuotaTree { public: @@ -462,6 +463,63 @@ Future<http::Response> Master::QuotaHandler::update( } } + // TODO(mzhu): Validate a role's limit is below its current consumption + // (otherwise a `force` flag is needed). + // + // TODO(mzhu): Pull out these validation in a function that can be shared + // between this and the old handlers. + + // Validate hierarchical quota. + + // TODO(mzhu): Keep an up-to-date `QuotaTree` in memory. + QuotaTree quotaTree{{}}; + + foreachpair (const string& role, const Quota& quota, master->quotas) { + quotaTree.update(role, quota); + } + + foreach (auto&& config, call.update_quota().quota_configs()) { + quotaTree.update(config.role(), Quota{config}); + } + + Option<Error> error = quotaTree.validate(); + if (error.isSome()) { + return BadRequest("Invalid QuotaConfig: " + error->message); + } + + // Overcommitment check. + + // Check for quota overcommit. We include resources from all + // registered agents, even if they are disconnected. + // + // Disconnection tends to be a transient state (e.g. agent + // might be getting restarted as part of an upgrade, there + // might be a transient networking issue, etc), so excluding + // disconnected agents could produce an unstable capacity + // calculation. + // + // TODO(bmahler): In the same vein, include agents that + // are recovered from the registry but not yet registered. + // Because we currently exclude them, the calculated capacity + // is 0 immediately after a failover and slowly works its way + // up to the pre-failover capacity as the agents re-register. + ResourceQuantities clusterCapacity; + foreachvalue (const Slave* agent, master->slaves.registered) { + clusterCapacity += ResourceQuantities::fromScalarResources( + agent->totalResources.nonRevocable().scalars()); + } + + if (!clusterCapacity.contains(quotaTree.totalGuarantees())) { + if (call.update_quota().force()) { + LOG(INFO) << "Using force flag to override quota overcommit check"; + } else { + return BadRequest("Invalid QuotaConfig: total quota guarantees '" + + stringify(quotaTree.totalGuarantees()) + "'" + " exceed cluster capacity '" + stringify(clusterCapacity) + "'" + " (use 'force' flag to bypass this check)"); + } + } + return NotImplemented(); }
