Maintenance Primitives: Added `MachineID` to Slave struct in Master.

Review: https://reviews.apache.org/r/37170


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9e7ee6b2
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9e7ee6b2
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9e7ee6b2

Branch: refs/heads/master
Commit: 9e7ee6b26f8afe419c7758327fc9ce9f580e0b54
Parents: 57385ec
Author: Joris Van Remoortere <[email protected]>
Authored: Sun Aug 30 13:56:56 2015 -0400
Committer: Joris Van Remoortere <[email protected]>
Committed: Mon Sep 14 13:58:37 2015 -0400

----------------------------------------------------------------------
 src/Makefile.am            |  1 +
 src/master/http.cpp        | 32 ++++++++++++++-------------
 src/master/machine.hpp     | 49 +++++++++++++++++++++++++++++++++++++++++
 src/master/maintenance.cpp |  6 ++---
 src/master/maintenance.hpp |  3 ++-
 src/master/master.cpp      | 21 +++++++++++++++++-
 src/master/master.hpp      | 12 +++++++---
 7 files changed, 101 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 8963cea..bb77c2d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -732,6 +732,7 @@ libmesos_no_3rdparty_la_SOURCES +=                          
        \
        master/constants.hpp                                            \
        master/detector.hpp                                             \
        master/flags.hpp                                                \
+       master/machine.hpp                                              \
        master/maintenance.hpp                                          \
        master/master.hpp                                               \
        master/metrics.hpp                                              \

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index 73e8857..a814930 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -1464,7 +1464,7 @@ Future<Response> Master::Http::maintenanceSchedule(const 
Request& request) const
   mesos::maintenance::Schedule schedule = protoSchedule.get();
   Try<Nothing> isValid = maintenance::validation::schedule(
       schedule,
-      master->machineInfos);
+      master->machines);
 
   if (isValid.isError()) {
     return BadRequest(isValid.error());
@@ -1495,17 +1495,18 @@ Future<Response> 
Master::Http::maintenanceSchedule(const Request& request) const
       }
 
       // NOTE: Copies are needed because this loop modifies the container.
-      foreachkey (const MachineID& id, utils::copy(master->machineInfos)) {
+      foreachkey (const MachineID& id, utils::copy(master->machines)) {
         // Update the entry for each updated machine.
         if (updated.contains(id)) {
-          master->machineInfos[id]
-            .mutable_unavailability()->CopyFrom(updated[id]);
+          master->machines[id]
+            .info.mutable_unavailability()->CopyFrom(updated[id]);
 
           continue;
         }
 
-        // Delete the entry for each removed machine.
-        master->machineInfos.erase(id);
+        // Remove the unavailability for each removed machine.
+        master->machines[id].info.clear_unavailability();
+        master->machines[id].info.set_mode(MachineInfo::UP);
       }
 
       // Save each new machine, with the unavailability
@@ -1517,7 +1518,7 @@ Future<Response> Master::Http::maintenanceSchedule(const 
Request& request) const
           info.set_mode(MachineInfo::DRAINING);
           info.mutable_unavailability()->CopyFrom(window.unavailability());
 
-          master->machineInfos[id] = info;
+          master->machines[id].info.CopyFrom(info);
         }
       }
 
@@ -1571,13 +1572,13 @@ Future<Response> Master::Http::machineDown(const 
Request& request) const
   // Check that all machines are part of a maintenance schedule.
   // TODO(josephw): Allow a transition from `UP` to `DOWN`.
   foreach (const MachineID& id, ids.values()) {
-    if (!master->machineInfos.contains(id)) {
+    if (!master->machines.contains(id)) {
       return BadRequest(
           "Machine '" + id.DebugString() +
             "' is not part of a maintenance schedule");
     }
 
-    if (master->machineInfos[id].mode() != MachineInfo::DRAINING) {
+    if (master->machines[id].info.mode() != MachineInfo::DRAINING) {
       return BadRequest(
           "Machine '" + id.DebugString() +
             "' is not in DRAINING mode and cannot be brought down");
@@ -1593,7 +1594,7 @@ Future<Response> Master::Http::machineDown(const Request& 
request) const
 
       // Update the master's local state with the downed machines.
       foreach (const MachineID& id, ids.values()) {
-        master->machineInfos[id].set_mode(MachineInfo::DOWN);
+        master->machines[id].info.set_mode(MachineInfo::DOWN);
       }
 
       return OK();
@@ -1641,13 +1642,13 @@ Future<Response> Master::Http::machineUp(const Request& 
request) const
 
   // Check that all machines are part of a maintenance schedule.
   foreach (const MachineID& id, ids.values()) {
-    if (!master->machineInfos.contains(id)) {
+    if (!master->machines.contains(id)) {
       return BadRequest(
           "Machine '" + id.DebugString() +
             "' is not part of a maintenance schedule");
     }
 
-    if (master->machineInfos[id].mode() != MachineInfo::DOWN) {
+    if (master->machines[id].info.mode() != MachineInfo::DOWN) {
       return BadRequest(
           "Machine '" + id.DebugString() +
             "' is not in DOWN mode and cannot be brought up");
@@ -1664,7 +1665,8 @@ Future<Response> Master::Http::machineUp(const Request& 
request) const
       // Update the master's local state with the reactivated machines.
       hashset<MachineID> updated;
       foreach (const MachineID& id, ids.values()) {
-        master->machineInfos.erase(id);
+        master->machines[id].info.set_mode(MachineInfo::UP);
+        master->machines[id].info.clear_unavailability();
         updated.insert(id);
       }
 
@@ -1718,8 +1720,8 @@ Future<Response> Master::Http::maintenanceStatus(const 
Request& request) const
 
   // Unwrap the master's machine information into two arrays of machines.
   mesos::maintenance::ClusterStatus status;
-  foreachkey (const MachineID& id, master->machineInfos) {
-    switch (master->machineInfos[id].mode()) {
+  foreachkey (const MachineID& id, master->machines) {
+    switch (master->machines[id].info.mode()) {
       case MachineInfo::DRAINING: {
         status.add_draining_machines()->CopyFrom(id);
         break;

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/machine.hpp
----------------------------------------------------------------------
diff --git a/src/master/machine.hpp b/src/master/machine.hpp
new file mode 100644
index 0000000..c0d4afc
--- /dev/null
+++ b/src/master/machine.hpp
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MESOS_MASTER_MACHINE_HPP__
+#define __MESOS_MASTER_MACHINE_HPP__
+
+#include <mesos/mesos.hpp>
+
+#include <stout/hashset.hpp>
+
+namespace mesos {
+namespace internal {
+namespace master {
+
+// A C++ Wrapper object for MachineInfo that also stores some extra 
information.
+struct Machine
+{
+  // A default constructor to allow use of the `[]` operator.
+  Machine() {}
+
+  Machine(const MachineInfo& _info) : info(_info) {}
+
+  // The state of the machine represented as a protobuf.
+  MachineInfo info;
+
+  // The set of slaves currently registered from this machine.
+  hashset<SlaveID> slaves;
+};
+
+} // namespace master {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __MESOS_MASTER_MACHINE_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/maintenance.cpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.cpp b/src/master/maintenance.cpp
index 277dd82..87308a6 100644
--- a/src/master/maintenance.cpp
+++ b/src/master/maintenance.cpp
@@ -201,7 +201,7 @@ namespace validation {
 
 Try<Nothing> schedule(
     const maintenance::Schedule& schedule,
-    const hashmap<MachineID, MachineInfo>& infos)
+    const hashmap<MachineID, Machine>& machines)
 {
   hashset<MachineID> updated;
   foreach (const maintenance::Window& window, schedule.windows()) {
@@ -238,8 +238,8 @@ Try<Nothing> schedule(
   }
 
   // Ensure that no `DOWN` machine is removed from the schedule.
-  foreachpair (const MachineID& id, const MachineInfo& info, infos) {
-    if (info.mode() == MachineInfo::DOWN && !updated.contains(id)) {
+  foreachpair (const MachineID& id, const Machine& machine, machines) {
+    if (machine.info.mode() == MachineInfo::DOWN && !updated.contains(id)) {
       return Error(
           "Machine '" + id.DebugString() +
             "' is deactivated and cannot be removed from the schedule");

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/maintenance.hpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.hpp b/src/master/maintenance.hpp
index bebaeb2..8d134aa 100644
--- a/src/master/maintenance.hpp
+++ b/src/master/maintenance.hpp
@@ -27,6 +27,7 @@
 #include <stout/nothing.hpp>
 #include <stout/try.hpp>
 
+#include "master/machine.hpp"
 #include "master/registrar.hpp"
 #include "master/registry.hpp"
 
@@ -124,7 +125,7 @@ namespace validation {
  */
 Try<Nothing> schedule(
     const mesos::maintenance::Schedule& schedule,
-    const hashmap<MachineID, MachineInfo>& infos);
+    const hashmap<MachineID, Machine>& machines);
 
 
 // Checks that the `duration` of the unavailability is non-negative.

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index c90311f..31fc83d 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1397,7 +1397,7 @@ Future<Nothing> Master::_recover(const Registry& registry)
 
   // Save the machine info for each machine.
   foreach (const Registry::Machine& machine, registry.machines().machines()) {
-    machineInfos[machine.info().id()] = machine.info();
+    machines[machine.info().id()] = Machine(machine.info());
   }
 
   // Recovery is now complete!
@@ -3744,9 +3744,14 @@ void Master::_registerSlave(
         stringify(slaveInfo.id()));
     send(pid, message);
   } else {
+    MachineID machineId;
+    machineId.set_hostname(slaveInfo.hostname());
+    machineId.set_ip(stringify(pid.address.ip));
+
     Slave* slave = new Slave(
         slaveInfo,
         pid,
+        machineId,
         version.empty() ? Option<string>::none() : version,
         Clock::now(),
         checkpointedResources);
@@ -3942,9 +3947,14 @@ void Master::_reregisterSlave(
     send(pid, message);
   } else {
     // Re-admission succeeded.
+    MachineID machineId;
+    machineId.set_hostname(slaveInfo.hostname());
+    machineId.set_ip(stringify(pid.address.ip));
+
     Slave* slave = new Slave(
         slaveInfo,
         pid,
+        machineId,
         version.empty() ? Option<string>::none() : version,
         Clock::now(),
         checkpointedResources,
@@ -5355,6 +5365,10 @@ void Master::addSlave(
 
   link(slave->pid);
 
+  // Map the slave to the machine it is running on.
+  CHECK(!machines[slave->machineId].slaves.contains(slave->id));
+  machines[slave->machineId].slaves.insert(slave->id);
+
   // Set up an observer for the slave.
   slave->observer = new SlaveObserver(
       slave->pid,
@@ -5499,6 +5513,11 @@ void Master::removeSlave(
   slaves.removed.put(slave->id, Nothing());
   authenticated.erase(slave->pid);
 
+  // Remove the slave from the `machines` mapping.
+  CHECK(machines.contains(slave->machineId));
+  CHECK(machines[slave->machineId].slaves.contains(slave->id));
+  machines[slave->machineId].slaves.erase(slave->id);
+
   // Kill the slave observer.
   terminate(slave->observer);
   wait(slave->observer);

http://git-wip-us.apache.org/repos/asf/mesos/blob/9e7ee6b2/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 12cc1ad..d7d27bd 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -70,6 +70,7 @@
 #include "master/contender.hpp"
 #include "master/detector.hpp"
 #include "master/flags.hpp"
+#include "master/machine.hpp"
 #include "master/metrics.hpp"
 #include "master/registrar.hpp"
 #include "master/validation.hpp"
@@ -109,6 +110,7 @@ struct Slave
 {
   Slave(const SlaveInfo& _info,
         const process::UPID& _pid,
+        const MachineID& _machineId,
         const Option<std::string> _version,
         const process::Time& _registeredTime,
         const Resources& _checkpointedResources,
@@ -118,6 +120,7 @@ struct Slave
           std::vector<Task>())
     : id(_info.id()),
       info(_info),
+      machineId(_machineId),
       pid(_pid),
       version(_version),
       registeredTime(_registeredTime),
@@ -280,6 +283,8 @@ struct Slave
   const SlaveID id;
   const SlaveInfo info;
 
+  const MachineID machineId;
+
   process::UPID pid;
 
   // The Mesos version of the slave. If set, the slave is >= 0.21.0.
@@ -954,9 +959,10 @@ private:
 
   MasterInfo info_;
 
-  // Holds some info which affects how a machine behaves.
-  // See the `MachineInfo` protobuf for more information.
-  hashmap<MachineID, MachineInfo> machineInfos;
+  // Holds some info which affects how a machine behaves, as well as state that
+  // represent the master's view of this machine. See the `MachineInfo` 
protobuf
+  // and `Machine` struct for more information.
+  hashmap<MachineID, Machine> machines;
 
   struct Maintenance
   {

Reply via email to