This is an automated email from the ASF dual-hosted git repository. josephwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 50ee946204d67487954b23a1ada7b412f7f328f8 Author: Joseph Wu <[email protected]> AuthorDate: Tue Jul 9 11:57:49 2019 -0700 Added best-effort validation for DRAIN_AGENT master call. This adds two cases which the master cannot deterministically enforce when using the DRAIN_AGENT call. The draining feature is currently incompatible with maintenance schedules and older versions of the Mesos agent. However, in both cases, an agent can reregister with incompatible info after starting to drain. Also, unreachable agents do not have enough info to enforce either case. These are unexpected cases and are not expected to cause any problems if circumvented. However, the agent draining feature is also not expected to work correctly if circumvented. Review: https://reviews.apache.org/r/71041 --- src/master/http.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/master/http.cpp b/src/master/http.cpp index 2b3faa7..ee0f2d5 100644 --- a/src/master/http.cpp +++ b/src/master/http.cpp @@ -3948,6 +3948,47 @@ Future<Response> Master::Http::drainAgent( CHECK(call.has_drain_agent()); SlaveID slaveId = call.drain_agent().slave_id(); + Slave* slave = master->slaves.registered.get(slaveId); + + if (slave != nullptr) { + // Check that the targeted agent is not part of a maintenance schedule. + // NOTE: This is a best-effort check, because it is possible to drain + // an agent, and then change the agent's hostname/IP into a MachineID + // in a maintenance schedule. Also, the MachineID of unreachable agents + // is unknown until they reregister. + // + // TODO(josephw): Reconsider this check once the maintenance and agent + // draining features are integrated. + // + // TODO(josephw): Check this condition against unreachable agents + // once MESOS-9884 is resolved. + if (!master->maintenance.schedules.empty()) { + foreach ( + const mesos::maintenance::Window& window, + master->maintenance.schedules.front().windows()) { + foreach (const MachineID& machineId, window.machine_ids()) { + if (machineId == slave->machineId) { + return BadRequest( + "Agent " + stringify(slaveId) + " is part of a maintenance" + " schedule under Machine " + stringify(machineId)); + } + } + } + } + + // Check that the targeted agent is capable of `AGENT_DRAINING`. + // NOTE: This is a best-effort check, because it is possible to drain + // an agent, and then downgrade the agent to a version that does not + // support draining. Also, the capabilities of unreachable agents + // are unknown until they reregister. + // + // TODO(josephw): Check this condition against unreachable agents + // once MESOS-9884 is resolved. + if (!slave->capabilities.agentDraining) { + return BadRequest( + "Agent " + stringify(slaveId) + " is not capable of draining"); + } + } Option<DurationInfo> maxGracePeriod; if (call.drain_agent().has_max_grace_period()) {
