Maintenance Primitives: Added `MachineID` to Slave struct in Master. Review: https://reviews.apache.org/r/37170
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9e7ee6b2 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9e7ee6b2 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9e7ee6b2 Branch: refs/heads/master Commit: 9e7ee6b26f8afe419c7758327fc9ce9f580e0b54 Parents: 57385ec Author: Joris Van Remoortere <[email protected]> Authored: Sun Aug 30 13:56:56 2015 -0400 Committer: Joris Van Remoortere <[email protected]> Committed: Mon Sep 14 13:58:37 2015 -0400 ---------------------------------------------------------------------- src/Makefile.am | 1 + src/master/http.cpp | 32 ++++++++++++++------------- src/master/machine.hpp | 49 +++++++++++++++++++++++++++++++++++++++++ src/master/maintenance.cpp | 6 ++--- src/master/maintenance.hpp | 3 ++- src/master/master.cpp | 21 +++++++++++++++++- src/master/master.hpp | 12 +++++++--- 7 files changed, 101 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/Makefile.am ---------------------------------------------------------------------- diff --git a/src/Makefile.am b/src/Makefile.am index 8963cea..bb77c2d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -732,6 +732,7 @@ libmesos_no_3rdparty_la_SOURCES += \ master/constants.hpp \ master/detector.hpp \ master/flags.hpp \ + master/machine.hpp \ master/maintenance.hpp \ master/master.hpp \ master/metrics.hpp \ http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/http.cpp ---------------------------------------------------------------------- diff --git a/src/master/http.cpp b/src/master/http.cpp index 73e8857..a814930 100644 --- a/src/master/http.cpp +++ b/src/master/http.cpp @@ -1464,7 +1464,7 @@ Future<Response> Master::Http::maintenanceSchedule(const Request& request) const mesos::maintenance::Schedule schedule = protoSchedule.get(); Try<Nothing> isValid = maintenance::validation::schedule( schedule, - master->machineInfos); + master->machines); if (isValid.isError()) { return BadRequest(isValid.error()); @@ -1495,17 +1495,18 @@ Future<Response> Master::Http::maintenanceSchedule(const Request& request) const } // NOTE: Copies are needed because this loop modifies the container. - foreachkey (const MachineID& id, utils::copy(master->machineInfos)) { + foreachkey (const MachineID& id, utils::copy(master->machines)) { // Update the entry for each updated machine. if (updated.contains(id)) { - master->machineInfos[id] - .mutable_unavailability()->CopyFrom(updated[id]); + master->machines[id] + .info.mutable_unavailability()->CopyFrom(updated[id]); continue; } - // Delete the entry for each removed machine. - master->machineInfos.erase(id); + // Remove the unavailability for each removed machine. + master->machines[id].info.clear_unavailability(); + master->machines[id].info.set_mode(MachineInfo::UP); } // Save each new machine, with the unavailability @@ -1517,7 +1518,7 @@ Future<Response> Master::Http::maintenanceSchedule(const Request& request) const info.set_mode(MachineInfo::DRAINING); info.mutable_unavailability()->CopyFrom(window.unavailability()); - master->machineInfos[id] = info; + master->machines[id].info.CopyFrom(info); } } @@ -1571,13 +1572,13 @@ Future<Response> Master::Http::machineDown(const Request& request) const // Check that all machines are part of a maintenance schedule. // TODO(josephw): Allow a transition from `UP` to `DOWN`. foreach (const MachineID& id, ids.values()) { - if (!master->machineInfos.contains(id)) { + if (!master->machines.contains(id)) { return BadRequest( "Machine '" + id.DebugString() + "' is not part of a maintenance schedule"); } - if (master->machineInfos[id].mode() != MachineInfo::DRAINING) { + if (master->machines[id].info.mode() != MachineInfo::DRAINING) { return BadRequest( "Machine '" + id.DebugString() + "' is not in DRAINING mode and cannot be brought down"); @@ -1593,7 +1594,7 @@ Future<Response> Master::Http::machineDown(const Request& request) const // Update the master's local state with the downed machines. foreach (const MachineID& id, ids.values()) { - master->machineInfos[id].set_mode(MachineInfo::DOWN); + master->machines[id].info.set_mode(MachineInfo::DOWN); } return OK(); @@ -1641,13 +1642,13 @@ Future<Response> Master::Http::machineUp(const Request& request) const // Check that all machines are part of a maintenance schedule. foreach (const MachineID& id, ids.values()) { - if (!master->machineInfos.contains(id)) { + if (!master->machines.contains(id)) { return BadRequest( "Machine '" + id.DebugString() + "' is not part of a maintenance schedule"); } - if (master->machineInfos[id].mode() != MachineInfo::DOWN) { + if (master->machines[id].info.mode() != MachineInfo::DOWN) { return BadRequest( "Machine '" + id.DebugString() + "' is not in DOWN mode and cannot be brought up"); @@ -1664,7 +1665,8 @@ Future<Response> Master::Http::machineUp(const Request& request) const // Update the master's local state with the reactivated machines. hashset<MachineID> updated; foreach (const MachineID& id, ids.values()) { - master->machineInfos.erase(id); + master->machines[id].info.set_mode(MachineInfo::UP); + master->machines[id].info.clear_unavailability(); updated.insert(id); } @@ -1718,8 +1720,8 @@ Future<Response> Master::Http::maintenanceStatus(const Request& request) const // Unwrap the master's machine information into two arrays of machines. mesos::maintenance::ClusterStatus status; - foreachkey (const MachineID& id, master->machineInfos) { - switch (master->machineInfos[id].mode()) { + foreachkey (const MachineID& id, master->machines) { + switch (master->machines[id].info.mode()) { case MachineInfo::DRAINING: { status.add_draining_machines()->CopyFrom(id); break; http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/machine.hpp ---------------------------------------------------------------------- diff --git a/src/master/machine.hpp b/src/master/machine.hpp new file mode 100644 index 0000000..c0d4afc --- /dev/null +++ b/src/master/machine.hpp @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MESOS_MASTER_MACHINE_HPP__ +#define __MESOS_MASTER_MACHINE_HPP__ + +#include <mesos/mesos.hpp> + +#include <stout/hashset.hpp> + +namespace mesos { +namespace internal { +namespace master { + +// A C++ Wrapper object for MachineInfo that also stores some extra information. +struct Machine +{ + // A default constructor to allow use of the `[]` operator. + Machine() {} + + Machine(const MachineInfo& _info) : info(_info) {} + + // The state of the machine represented as a protobuf. + MachineInfo info; + + // The set of slaves currently registered from this machine. + hashset<SlaveID> slaves; +}; + +} // namespace master { +} // namespace internal { +} // namespace mesos { + +#endif // __MESOS_MASTER_MACHINE_HPP__ http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/maintenance.cpp ---------------------------------------------------------------------- diff --git a/src/master/maintenance.cpp b/src/master/maintenance.cpp index 277dd82..87308a6 100644 --- a/src/master/maintenance.cpp +++ b/src/master/maintenance.cpp @@ -201,7 +201,7 @@ namespace validation { Try<Nothing> schedule( const maintenance::Schedule& schedule, - const hashmap<MachineID, MachineInfo>& infos) + const hashmap<MachineID, Machine>& machines) { hashset<MachineID> updated; foreach (const maintenance::Window& window, schedule.windows()) { @@ -238,8 +238,8 @@ Try<Nothing> schedule( } // Ensure that no `DOWN` machine is removed from the schedule. - foreachpair (const MachineID& id, const MachineInfo& info, infos) { - if (info.mode() == MachineInfo::DOWN && !updated.contains(id)) { + foreachpair (const MachineID& id, const Machine& machine, machines) { + if (machine.info.mode() == MachineInfo::DOWN && !updated.contains(id)) { return Error( "Machine '" + id.DebugString() + "' is deactivated and cannot be removed from the schedule"); http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/maintenance.hpp ---------------------------------------------------------------------- diff --git a/src/master/maintenance.hpp b/src/master/maintenance.hpp index bebaeb2..8d134aa 100644 --- a/src/master/maintenance.hpp +++ b/src/master/maintenance.hpp @@ -27,6 +27,7 @@ #include <stout/nothing.hpp> #include <stout/try.hpp> +#include "master/machine.hpp" #include "master/registrar.hpp" #include "master/registry.hpp" @@ -124,7 +125,7 @@ namespace validation { */ Try<Nothing> schedule( const mesos::maintenance::Schedule& schedule, - const hashmap<MachineID, MachineInfo>& infos); + const hashmap<MachineID, Machine>& machines); // Checks that the `duration` of the unavailability is non-negative. http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index c90311f..31fc83d 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -1397,7 +1397,7 @@ Future<Nothing> Master::_recover(const Registry& registry) // Save the machine info for each machine. foreach (const Registry::Machine& machine, registry.machines().machines()) { - machineInfos[machine.info().id()] = machine.info(); + machines[machine.info().id()] = Machine(machine.info()); } // Recovery is now complete! @@ -3744,9 +3744,14 @@ void Master::_registerSlave( stringify(slaveInfo.id())); send(pid, message); } else { + MachineID machineId; + machineId.set_hostname(slaveInfo.hostname()); + machineId.set_ip(stringify(pid.address.ip)); + Slave* slave = new Slave( slaveInfo, pid, + machineId, version.empty() ? Option<string>::none() : version, Clock::now(), checkpointedResources); @@ -3942,9 +3947,14 @@ void Master::_reregisterSlave( send(pid, message); } else { // Re-admission succeeded. + MachineID machineId; + machineId.set_hostname(slaveInfo.hostname()); + machineId.set_ip(stringify(pid.address.ip)); + Slave* slave = new Slave( slaveInfo, pid, + machineId, version.empty() ? Option<string>::none() : version, Clock::now(), checkpointedResources, @@ -5355,6 +5365,10 @@ void Master::addSlave( link(slave->pid); + // Map the slave to the machine it is running on. + CHECK(!machines[slave->machineId].slaves.contains(slave->id)); + machines[slave->machineId].slaves.insert(slave->id); + // Set up an observer for the slave. slave->observer = new SlaveObserver( slave->pid, @@ -5499,6 +5513,11 @@ void Master::removeSlave( slaves.removed.put(slave->id, Nothing()); authenticated.erase(slave->pid); + // Remove the slave from the `machines` mapping. + CHECK(machines.contains(slave->machineId)); + CHECK(machines[slave->machineId].slaves.contains(slave->id)); + machines[slave->machineId].slaves.erase(slave->id); + // Kill the slave observer. terminate(slave->observer); wait(slave->observer); http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/master.hpp ---------------------------------------------------------------------- diff --git a/src/master/master.hpp b/src/master/master.hpp index 12cc1ad..d7d27bd 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -70,6 +70,7 @@ #include "master/contender.hpp" #include "master/detector.hpp" #include "master/flags.hpp" +#include "master/machine.hpp" #include "master/metrics.hpp" #include "master/registrar.hpp" #include "master/validation.hpp" @@ -109,6 +110,7 @@ struct Slave { Slave(const SlaveInfo& _info, const process::UPID& _pid, + const MachineID& _machineId, const Option<std::string> _version, const process::Time& _registeredTime, const Resources& _checkpointedResources, @@ -118,6 +120,7 @@ struct Slave std::vector<Task>()) : id(_info.id()), info(_info), + machineId(_machineId), pid(_pid), version(_version), registeredTime(_registeredTime), @@ -280,6 +283,8 @@ struct Slave const SlaveID id; const SlaveInfo info; + const MachineID machineId; + process::UPID pid; // The Mesos version of the slave. If set, the slave is >= 0.21.0. @@ -954,9 +959,10 @@ private: MasterInfo info_; - // Holds some info which affects how a machine behaves. - // See the `MachineInfo` protobuf for more information. - hashmap<MachineID, MachineInfo> machineInfos; + // Holds some info which affects how a machine behaves, as well as state that + // represent the master's view of this machine. See the `MachineInfo` protobuf + // and `Machine` struct for more information. + hashmap<MachineID, Machine> machines; struct Maintenance {
