Maintenance Primitives: Added machine UP endpoint.

Endpoint: /machine/up
  Transitions agents back into UP mode.

Registry operation = maintenance::StopMaintenance
  Sets the list of machines back to UP mode.  Removes those machines
  from the maintenance schedule.

Review: https://reviews.apache.org/r/37362


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bf4ca549
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bf4ca549
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bf4ca549

Branch: refs/heads/master
Commit: bf4ca5497fbed88ec54fd25f962c5eb1cee3b48c
Parents: de231ed
Author: Joseph Wu <[email protected]>
Authored: Sun Aug 30 13:56:33 2015 -0400
Committer: Joris Van Remoortere <[email protected]>
Committed: Mon Aug 31 13:15:16 2015 -0400

----------------------------------------------------------------------
 src/master/http.cpp                    | 103 +++++++++++++++++++++++
 src/master/maintenance.cpp             |  55 +++++++++++++
 src/master/maintenance.hpp             |  23 ++++++
 src/master/master.cpp                  |   6 ++
 src/master/master.hpp                  |   5 ++
 src/tests/master_maintenance_tests.cpp | 121 +++++++++++++++++++++++++++-
 src/tests/registrar_tests.cpp          |  79 ++++++++++++++++++
 7 files changed, 391 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index 11e786d..6fad959 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -93,6 +93,7 @@ using process::http::UnsupportedMediaType;
 
 using process::metrics::internal::MetricsProcess;
 
+using std::list;
 using std::map;
 using std::string;
 using std::vector;
@@ -1548,6 +1549,108 @@ Future<Response> Master::Http::machineDown(const 
Request& request) const
 }
 
 
+// /master/maintenance/start endpoint help.
+const string Master::Http::MACHINE_UP_HELP = HELP(
+    TLDR(
+        "Brings a set of machines back up."),
+    USAGE(
+        "/master/machine/up"),
+    DESCRIPTION(
+        "POST: Validates the request body as JSON and transitions",
+        "  the list of machines into UP mode.  This also removes",
+        "  the list of machines from the maintenance schedule."));
+
+
+// /master/machine/up endpoint handler.
+Future<Response> Master::Http::machineUp(const Request& request) const
+{
+  if (request.method != "POST") {
+    return BadRequest("Expecting POST, got '" + request.method + "'");
+  }
+
+  // Parse the POST body as JSON.
+  Try<JSON::Object> jsonIds = JSON::parse<JSON::Object>(request.body);
+  if (jsonIds.isError()) {
+    return BadRequest(jsonIds.error());
+  }
+
+  // Convert the machines to a protobuf.
+  Try<MachineIDs> protoIds =
+    ::protobuf::parse<MachineIDs>(jsonIds.get());
+
+  if (protoIds.isError()) {
+    return BadRequest(protoIds.error());
+  }
+
+  // Validate every machine in the list.
+  MachineIDs ids = protoIds.get();
+  Try<Nothing> isValid = maintenance::validation::machines(ids);
+  if (isValid.isError()) {
+    return BadRequest(isValid.error());
+  }
+
+  // Check that all machines are part of a maintenance schedule.
+  foreach (const MachineID& id, ids.values()) {
+    if (!master->machineInfos.contains(id)) {
+      return BadRequest(
+          "Machine '" + id.DebugString() +
+            "' is not part of a maintenance schedule");
+    }
+
+    if (master->machineInfos[id].mode() != MachineInfo::DOWN) {
+      return BadRequest(
+          "Machine '" + id.DebugString() +
+            "' is not in DOWN mode and cannot be brought up");
+    }
+  }
+
+  return master->registrar->apply(Owned<Operation>(
+      new maintenance::StopMaintenance(ids)))
+    .then(defer(master->self(), [=](bool result) -> Future<Response> {
+      // See the top comment in "master/maintenance.hpp" for why this check
+      // is here, and is appropriate.
+      CHECK(result);
+
+      // Update the master's local state with the reactivated machines.
+      hashset<MachineID> updated;
+      foreach (const MachineID& id, ids.values()) {
+        master->machineInfos.erase(id);
+        updated.insert(id);
+      }
+
+      // Delete the machines from the schedule.
+      for (list<mesos::maintenance::Schedule>::iterator schedule =
+          master->maintenance.schedules.begin();
+          schedule != master->maintenance.schedules.end();) {
+        for (int j = schedule->windows().size() - 1; j >= 0; j--) {
+          mesos::maintenance::Window* window = schedule->mutable_windows(j);
+
+          // Delete individual machines.
+          for (int k = window->machine_ids().size() - 1; k >= 0; k--) {
+            if (updated.contains(window->machine_ids(k))) {
+              window->mutable_machine_ids()->DeleteSubrange(k, 1);
+            }
+          }
+
+          // If the resulting window is empty, delete it.
+          if (window->machine_ids().size() == 0) {
+            schedule->mutable_windows()->DeleteSubrange(j, 1);
+          }
+        }
+
+        // If the resulting schedule is empty, delete it.
+        if (schedule->windows().size() == 0) {
+          schedule = master->maintenance.schedules.erase(schedule);
+        } else {
+          ++schedule;
+        }
+      }
+
+      return OK();
+    }));
+}
+
+
 Result<Credential> Master::Http::authenticate(const Request& request) const
 {
   // By default, assume everyone is authenticated if no credentials

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/master/maintenance.cpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.cpp b/src/master/maintenance.cpp
index 859cef9..277dd82 100644
--- a/src/master/maintenance.cpp
+++ b/src/master/maintenance.cpp
@@ -142,6 +142,61 @@ Try<bool> StartMaintenance::perform(
 }
 
 
+StopMaintenance::StopMaintenance(
+    const MachineIDs& _ids)
+{
+  foreach (const MachineID& id, _ids.values()) {
+    ids.insert(id);
+  }
+}
+
+
+Try<bool> StopMaintenance::perform(
+    Registry* registry,
+    hashset<SlaveID>* slaveIDs,
+    bool strict)
+{
+  // Delete the machine info entry of all targeted machines.
+  // i.e. Transition them into `UP` mode.
+  bool changed = false;
+  for (int i = registry->machines().machines().size() - 1; i >= 0; i--) {
+    if (ids.contains(registry->machines().machines(i).info().id())) {
+      registry->mutable_machines()->mutable_machines()->DeleteSubrange(i, 1);
+
+      changed = true; // Mutation.
+    }
+  }
+
+  // Delete the machines from the schedule.
+  for (int i = registry->schedules().size() - 1; i >= 0; i--) {
+    maintenance::Schedule* schedule = registry->mutable_schedules(i);
+
+    for (int j = schedule->windows().size() - 1; j >= 0; j--) {
+      maintenance::Window* window = schedule->mutable_windows(j);
+
+      // Delete individual machines.
+      for (int k = window->machine_ids().size() - 1; k >= 0; k--) {
+        if (ids.contains(window->machine_ids(k))) {
+          window->mutable_machine_ids()->DeleteSubrange(k, 1);
+        }
+      }
+
+      // If the resulting window is empty, delete it.
+      if (window->machine_ids().size() == 0) {
+        schedule->mutable_windows()->DeleteSubrange(j, 1);
+      }
+    }
+
+    // If the resulting schedule is empty, delete it.
+    if (schedule->windows().size() == 0) {
+      registry->mutable_schedules()->DeleteSubrange(i, 1);
+    }
+  }
+
+  return changed;
+}
+
+
 namespace validation {
 
 Try<Nothing> schedule(

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/master/maintenance.hpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.hpp b/src/master/maintenance.hpp
index 8e6cb9c..bebaeb2 100644
--- a/src/master/maintenance.hpp
+++ b/src/master/maintenance.hpp
@@ -89,6 +89,29 @@ private:
 };
 
 
+/**
+ * Transitions a group of machines from `DOWN` mode into `UP` mode.
+ * All machines must be in `DOWN` mode and must be part of a maintenance
+ * schedule prior to executing this operation. The machines will be
+ * removed from the maintenance schedule.
+ */
+class StopMaintenance : public Operation
+{
+public:
+  explicit StopMaintenance(
+      const MachineIDs& _ids);
+
+protected:
+  Try<bool> perform(
+      Registry* registry,
+      hashset<SlaveID>* slaveIDs,
+      bool strict);
+
+private:
+  hashset<MachineID> ids;
+};
+
+
 namespace validation {
 
 /**

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 06e283d..cd1b386 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -827,6 +827,12 @@ void Master::initialize()
           Http::log(request);
           return http.machineDown(request);
         });
+  route("/machine/up",
+        Http::MACHINE_UP_HELP,
+        [http](const process::http::Request& request) {
+          Http::log(request);
+          return http.machineUp(request);
+        });
 
   // Provide HTTP assets from a "webui" directory. This is either
   // specified via flags (which is necessary for running out of the

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index c7e96db..68be718 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -840,6 +840,10 @@ private:
     process::Future<process::http::Response> machineDown(
         const process::http::Request& request) const;
 
+    // /master/machine/up
+    process::Future<process::http::Response> machineUp(
+        const process::http::Request& request) const;
+
     const static std::string SCHEDULER_HELP;
     const static std::string HEALTH_HELP;
     const static std::string OBSERVE_HELP;
@@ -852,6 +856,7 @@ private:
     const static std::string TASKS_HELP;
     const static std::string MAINTENANCE_SCHEDULE_HELP;
     const static std::string MACHINE_DOWN_HELP;
+    const static std::string MACHINE_UP_HELP;
 
   private:
     // Helper for doing authentication, returns the credential used if

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/tests/master_maintenance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_maintenance_tests.cpp 
b/src/tests/master_maintenance_tests.cpp
index 67301cc..c3acd7a 100644
--- a/src/tests/master_maintenance_tests.cpp
+++ b/src/tests/master_maintenance_tests.cpp
@@ -18,6 +18,8 @@
 
 #include <string>
 
+#include <mesos/maintenance/maintenance.hpp>
+
 #include <process/clock.hpp>
 #include <process/future.hpp>
 #include <process/http.hpp>
@@ -28,6 +30,7 @@
 #include <stout/json.hpp>
 #include <stout/net.hpp>
 #include <stout/option.hpp>
+#include <stout/protobuf.hpp>
 #include <stout/strings.hpp>
 #include <stout/stringify.hpp>
 #include <stout/try.hpp>
@@ -325,8 +328,124 @@ TEST_F(MasterMaintenanceTest, BringDownMachines)
       "machine/down",
       headers,
       stringify(JSON::Protobuf(machines)));
-  
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+}
+
+
+// Posts valid and invalid machines to the maintenance stop endpoint.
+TEST_F(MasterMaintenanceTest, BringUpMachines)
+{
+  // Set up a master.
+  Try<PID<Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Try to bring up an unscheduled machine.
+  MachineIDs machines = createMachineList({machine1, machine2});
+  Future<Response> response = process::http::post(
+      master.get(),
+      "machine/up",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Post a valid schedule with three machines.
+  maintenance::Schedule schedule = createSchedule({
+      createWindow({machine1, machine2}, unavailability),
+      createWindow({machine3}, unavailability)});
+
+  response = process::http::post(
+      master.get(),
+      "maintenance/schedule",
+      headers,
+      stringify(JSON::Protobuf(schedule)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Try to bring up a non-down machine.
+  machines = createMachineList({machine1, machine2});
+  response = process::http::post(
+      master.get(),
+      "machine/up",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Down machine3.
+  machines = createMachineList({machine3});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Up machine3.
+  response = process::http::post(
+      master.get(),
+      "machine/up",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Get the maintenance schedule.
+  response = process::http::get(
+      master.get(),
+      "maintenance/schedule");
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Check that only one maintenance window remains.
+  Try<JSON::Object> masterSchedule_ =
+    JSON::parse<JSON::Object>(response.get().body);
+
+  ASSERT_SOME(masterSchedule_);
+  Try<mesos::maintenance::Schedule> masterSchedule =
+    ::protobuf::parse<mesos::maintenance::Schedule>(masterSchedule_.get());
+
+  ASSERT_SOME(masterSchedule);
+  ASSERT_EQ(1, masterSchedule.get().windows().size());
+  ASSERT_EQ(2, masterSchedule.get().windows(0).machine_ids().size());
+
+  // Down the other machines.
+  machines = createMachineList({machine1, machine2});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Up the other machines.
+  response = process::http::post(
+      master.get(),
+      "machine/up",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
   AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Get the maintenance schedule again.
+  response = process::http::get(
+      master.get(),
+      "maintenance/schedule");
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Check that the schedule is empty.
+  masterSchedule_ = JSON::parse<JSON::Object>(response.get().body);
+
+  ASSERT_SOME(masterSchedule_);
+  masterSchedule =
+    ::protobuf::parse<mesos::maintenance::Schedule>(masterSchedule_.get());
+
+  ASSERT_SOME(masterSchedule);
+  ASSERT_EQ(0, masterSchedule.get().windows().size());
 }
 
 } // namespace tests {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bf4ca549/src/tests/registrar_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp
index 733a2cd..aa49c86 100644
--- a/src/tests/registrar_tests.cpp
+++ b/src/tests/registrar_tests.cpp
@@ -585,6 +585,85 @@ TEST_P(RegistrarTest, StartMaintenance)
 }
 
 
+// Creates a schedule and properly starts and stops maintenance.
+TEST_P(RegistrarTest, StopMaintenance)
+{
+  // Machine definitions used in this test.
+  MachineID machine1;
+  machine1.set_ip("0.0.0.1");
+
+  MachineID machine2;
+  machine2.set_hostname("2");
+
+  MachineID machine3;
+  machine3.set_hostname("3");
+  machine3.set_ip("0.0.0.3");
+
+  Unavailability unavailability = createUnavailability(Clock::now());
+
+  {
+    // Prepare the registrar.
+    Registrar registrar(flags, state);
+    AWAIT_READY(registrar.recover(master));
+
+    // Schdule three machines for maintenance.
+    maintenance::Schedule schedule = createSchedule({
+        createWindow({machine1, machine2}, unavailability),
+        createWindow({machine3}, unavailability)});
+
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new UpdateSchedule(schedule))));
+
+    // Transition machine three into `DOWN` mode.
+    MachineIDs machines = createMachineList({machine3});
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StartMaintenance(machines))));
+
+    // Transition machine three into `UP` mode.
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StopMaintenance(machines))));
+  }
+
+  {
+    // Check that machine three and the window were removed.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    EXPECT_EQ(1, registry.get().schedules().size());
+    EXPECT_EQ(1, registry.get().schedules(0).windows().size());
+    EXPECT_EQ(2, registry.get().schedules(0).windows(0).machine_ids().size());
+    EXPECT_EQ(2, registry.get().machines().machines().size());
+    EXPECT_EQ(
+        MachineInfo::DRAINING,
+        registry.get().machines().machines(0).info().mode());
+
+    EXPECT_EQ(
+        MachineInfo::DRAINING,
+        registry.get().machines().machines(1).info().mode());
+
+    // Transition machine one and two into `DOWN` mode.
+    MachineIDs machines = createMachineList({machine1, machine2});
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StartMaintenance(machines))));
+
+    // Transition all machines into `UP` mode.
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StopMaintenance(machines))));
+  }
+
+  {
+    // Check that the schedule is now empty.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    EXPECT_EQ(0, registry.get().schedules().size());
+    EXPECT_EQ(0, registry.get().machines().machines().size());
+  }
+}
+
+
 TEST_P(RegistrarTest, Bootstrap)
 {
   // Run 1 readmits a slave that is not present.

Reply via email to