Maintenance Primitives: Added machine DOWN endpoint.

Endpoint: /machine/down
  Transitions agents into the DOWN mode.

Registry operation = maintenance::StartMaintenance
  Sets the list of machines as DOWN.

Review: https://reviews.apache.org/r/37358


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/de231ed5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/de231ed5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/de231ed5

Branch: refs/heads/master
Commit: de231ed590809d85c9e47071f5ce899035a15dc4
Parents: 3b0fe5c
Author: Joseph Wu <[email protected]>
Authored: Sun Aug 30 13:56:21 2015 -0400
Committer: Joris Van Remoortere <[email protected]>
Committed: Mon Aug 31 13:09:54 2015 -0400

----------------------------------------------------------------------
 src/master/http.cpp                    | 73 ++++++++++++++++++++++
 src/master/maintenance.cpp             | 30 +++++++++
 src/master/maintenance.hpp             | 24 ++++++++
 src/master/master.cpp                  |  6 ++
 src/master/master.hpp                  |  5 ++
 src/tests/master_maintenance_tests.cpp | 95 +++++++++++++++++++++++++++++
 src/tests/registrar_tests.cpp          | 87 +++++++++++++++++++++++++-
 7 files changed, 319 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index 44178d8..11e786d 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -1475,6 +1475,79 @@ Future<Response> Master::Http::maintenanceSchedule(const 
Request& request) const
 }
 
 
+// /master/machine/down endpoint help.
+const string Master::Http::MACHINE_DOWN_HELP = HELP(
+    TLDR(
+        "Brings a set of machines down."),
+    USAGE(
+        "/master/machine/down"),
+    DESCRIPTION(
+        "POST: Validates the request body as JSON and transitions",
+        "  the list of machines into DOWN mode.  Currently, only",
+        "  machines in DRAINING mode are allowed to be brought down."));
+
+
+// /master/machine/down endpoint handler.
+Future<Response> Master::Http::machineDown(const Request& request) const
+{
+  if (request.method != "POST") {
+    return BadRequest("Expecting POST, got '" + request.method + "'");
+  }
+
+  // Parse the POST body as JSON.
+  Try<JSON::Object> jsonIds = JSON::parse<JSON::Object>(request.body);
+  if (jsonIds.isError()) {
+    return BadRequest(jsonIds.error());
+  }
+
+  // Convert the machines to a protobuf.
+  Try<MachineIDs> protoIds =
+    ::protobuf::parse<MachineIDs>(jsonIds.get());
+
+  if (protoIds.isError()) {
+    return BadRequest(protoIds.error());
+  }
+
+  // Validate every machine in the list.
+  MachineIDs ids = protoIds.get();
+  Try<Nothing> isValid = maintenance::validation::machines(ids);
+  if (isValid.isError()) {
+    return BadRequest(isValid.error());
+  }
+
+  // Check that all machines are part of a maintenance schedule.
+  // TODO(josephw): Allow a transition from `UP` to `DOWN`.
+  foreach (const MachineID& id, ids.values()) {
+    if (!master->machineInfos.contains(id)) {
+      return BadRequest(
+          "Machine '" + id.DebugString() +
+            "' is not part of a maintenance schedule");
+    }
+
+    if (master->machineInfos[id].mode() != MachineInfo::DRAINING) {
+      return BadRequest(
+          "Machine '" + id.DebugString() +
+            "' is not in DRAINING mode and cannot be brought down");
+    }
+  }
+
+  return master->registrar->apply(Owned<Operation>(
+      new maintenance::StartMaintenance(ids)))
+    .then(defer(master->self(), [=](bool result) -> Future<Response> {
+      // See the top comment in "master/maintenance.hpp" for why this check
+      // is here, and is appropriate.
+      CHECK(result);
+
+      // Update the master's local state with the downed machines.
+      foreach (const MachineID& id, ids.values()) {
+        master->machineInfos[id].set_mode(MachineInfo::DOWN);
+      }
+
+      return OK();
+    }));
+}
+
+
 Result<Credential> Master::Http::authenticate(const Request& request) const
 {
   // By default, assume everyone is authenticated if no credentials

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/master/maintenance.cpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.cpp b/src/master/maintenance.cpp
index 798c026..859cef9 100644
--- a/src/master/maintenance.cpp
+++ b/src/master/maintenance.cpp
@@ -112,6 +112,36 @@ Try<bool> UpdateSchedule::perform(
 }
 
 
+StartMaintenance::StartMaintenance(
+    const MachineIDs& _ids)
+{
+  foreach (const MachineID& id, _ids.values()) {
+    ids.insert(id);
+  }
+}
+
+
+Try<bool> StartMaintenance::perform(
+    Registry* registry,
+    hashset<SlaveID>* slaveIDs,
+    bool strict)
+{
+  // Flip the mode of all targeted machines.
+  bool changed = false;
+  for (int i = 0; i < registry->machines().machines().size(); i++) {
+    if (ids.contains(registry->machines().machines(i).info().id())) {
+      // Flip the mode.
+      registry->mutable_machines()->mutable_machines(i)
+        ->mutable_info()->set_mode(MachineInfo::DOWN);
+
+      changed = true; // Mutation.
+    }
+  }
+
+  return changed;
+}
+
+
 namespace validation {
 
 Try<Nothing> schedule(

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/master/maintenance.hpp
----------------------------------------------------------------------
diff --git a/src/master/maintenance.hpp b/src/master/maintenance.hpp
index 42b5f9e..8e6cb9c 100644
--- a/src/master/maintenance.hpp
+++ b/src/master/maintenance.hpp
@@ -65,6 +65,30 @@ private:
 };
 
 
+/**
+ * Transitions a group of machines from `DRAINING` mode into
+ * `DOWN` mode.  All machines must be part of a maintenance
+ * schedule prior to executing this operation.
+ *
+ * TODO(josephw): Allow a transition from `UP` to `DOWN`.
+ */
+class StartMaintenance : public Operation
+{
+public:
+  explicit StartMaintenance(
+      const MachineIDs& _ids);
+
+protected:
+  Try<bool> perform(
+      Registry* registry,
+      hashset<SlaveID>* slaveIDs,
+      bool strict);
+
+private:
+  hashset<MachineID> ids;
+};
+
+
 namespace validation {
 
 /**

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index ea556f9..06e283d 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -821,6 +821,12 @@ void Master::initialize()
           Http::log(request);
           return http.maintenanceSchedule(request);
         });
+  route("/machine/down",
+        Http::MACHINE_DOWN_HELP,
+        [http](const process::http::Request& request) {
+          Http::log(request);
+          return http.machineDown(request);
+        });
 
   // Provide HTTP assets from a "webui" directory. This is either
   // specified via flags (which is necessary for running out of the

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 175e623..c7e96db 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -836,6 +836,10 @@ private:
     process::Future<process::http::Response> maintenanceSchedule(
         const process::http::Request& request) const;
 
+    // /master/machine/down
+    process::Future<process::http::Response> machineDown(
+        const process::http::Request& request) const;
+
     const static std::string SCHEDULER_HELP;
     const static std::string HEALTH_HELP;
     const static std::string OBSERVE_HELP;
@@ -847,6 +851,7 @@ private:
     const static std::string STATESUMMARY_HELP;
     const static std::string TASKS_HELP;
     const static std::string MAINTENANCE_SCHEDULE_HELP;
+    const static std::string MACHINE_DOWN_HELP;
 
   private:
     // Helper for doing authentication, returns the credential used if

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/tests/master_maintenance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_maintenance_tests.cpp 
b/src/tests/master_maintenance_tests.cpp
index 1258ecc..67301cc 100644
--- a/src/tests/master_maintenance_tests.cpp
+++ b/src/tests/master_maintenance_tests.cpp
@@ -54,6 +54,7 @@ using process::http::BadRequest;
 using process::http::OK;
 using process::http::Response;
 
+using mesos::internal::protobuf::maintenance::createMachineList;
 using mesos::internal::protobuf::maintenance::createSchedule;
 using mesos::internal::protobuf::maintenance::createUnavailability;
 using mesos::internal::protobuf::maintenance::createWindow;
@@ -234,6 +235,100 @@ TEST_F(MasterMaintenanceTest, UpdateSchedule)
   AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
 }
 
+
+// Posts valid and invalid machines to the maintenance start endpoint.
+TEST_F(MasterMaintenanceTest, BringDownMachines)
+{
+  // Set up a master.
+  Try<PID<Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Extra machine used in this test.
+  // It isn't filled in, so it's incorrect.
+  MachineID badMachine;
+
+  // Try to start maintenance on an unscheduled machine.
+  MachineIDs machines = createMachineList({machine1, machine2});
+  Future<Response> response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Try an empty list.
+  machines = createMachineList({});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Try an empty machine.
+  machines = createMachineList({badMachine});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Post a valid schedule with two machines.
+  maintenance::Schedule schedule = createSchedule(
+      {createWindow({machine1, machine2}, unavailability)});
+
+  response = process::http::post(
+      master.get(),
+      "maintenance/schedule",
+      headers,
+      stringify(JSON::Protobuf(schedule)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Down machine1.
+  machines = createMachineList({machine1});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+  // Fail to down machine1 again.
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Fail to down machine1 and machine2.
+  machines = createMachineList({machine1, machine2});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response);
+
+  // Down machine2.
+  machines = createMachineList({machine2});
+  response = process::http::post(
+      master.get(),
+      "machine/down",
+      headers,
+      stringify(JSON::Protobuf(machines)));
+  
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/de231ed5/src/tests/registrar_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp
index 567934c..733a2cd 100644
--- a/src/tests/registrar_tests.cpp
+++ b/src/tests/registrar_tests.cpp
@@ -72,6 +72,7 @@ using std::vector;
 
 using process::Clock;
 
+using mesos::internal::protobuf::maintenance::createMachineList;
 using mesos::internal::protobuf::maintenance::createSchedule;
 using mesos::internal::protobuf::maintenance::createUnavailability;
 using mesos::internal::protobuf::maintenance::createWindow;
@@ -492,7 +493,7 @@ TEST_P(RegistrarTest, UpdateMaintenanceSchedule)
     Registrar registrar(flags, state);
     Future<Registry> registry = registrar.recover(master);
     AWAIT_READY(registry);
-    
+
     EXPECT_EQ(1, registry.get().schedules().size());
     EXPECT_EQ(0, registry.get().schedules(0).windows().size());
     EXPECT_EQ(0, registry.get().machines().machines().size());
@@ -500,6 +501,90 @@ TEST_P(RegistrarTest, UpdateMaintenanceSchedule)
 }
 
 
+// Creates a schedule and properly starts maintenance.
+TEST_P(RegistrarTest, StartMaintenance)
+{
+  // Machine definitions used in this test.
+  MachineID machine1;
+  machine1.set_ip("0.0.0.1");
+
+  MachineID machine2;
+  machine2.set_hostname("2");
+
+  MachineID machine3;
+  machine3.set_hostname("3");
+  machine3.set_ip("0.0.0.3");
+
+  Unavailability unavailability = createUnavailability(Clock::now());
+
+  {
+    // Prepare the registrar.
+    Registrar registrar(flags, state);
+    AWAIT_READY(registrar.recover(master));
+
+    // Schedule two machines for maintenance.
+    maintenance::Schedule schedule = createSchedule(
+        {createWindow({machine1, machine2}, unavailability)});
+
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new UpdateSchedule(schedule))));
+
+    // Transition machine two into `DOWN` mode.
+    MachineIDs machines = createMachineList({machine2});
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StartMaintenance(machines))));
+  }
+
+  {
+    // Check that machine two is down.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    EXPECT_EQ(2, registry.get().machines().machines().size());
+    EXPECT_EQ(
+        MachineInfo::DRAINING,
+        registry.get().machines().machines(0).info().mode());
+
+    EXPECT_EQ(
+        MachineInfo::DOWN,
+        registry.get().machines().machines(1).info().mode());
+
+    // Schedule three machines for maintenance.
+    maintenance::Schedule schedule = createSchedule(
+        {createWindow({machine1, machine2, machine3}, unavailability)});
+
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new UpdateSchedule(schedule))));
+
+    // Deactivate the two `DRAINING` machines.
+    MachineIDs machines = createMachineList({machine1, machine3});
+    AWAIT_READY(registrar.apply(
+        Owned<Operation>(new StartMaintenance(machines))));
+  }
+
+  {
+    // Check that all machines are down.
+    Registrar registrar(flags, state);
+    Future<Registry> registry = registrar.recover(master);
+    AWAIT_READY(registry);
+
+    EXPECT_EQ(3, registry.get().machines().machines().size());
+    EXPECT_EQ(
+        MachineInfo::DOWN,
+        registry.get().machines().machines(0).info().mode());
+
+    EXPECT_EQ(
+        MachineInfo::DOWN,
+        registry.get().machines().machines(1).info().mode());
+
+    EXPECT_EQ(
+        MachineInfo::DOWN,
+        registry.get().machines().machines(2).info().mode());
+  }
+}
+
+
 TEST_P(RegistrarTest, Bootstrap)
 {
   // Run 1 readmits a slave that is not present.

Reply via email to