Repository: mesos
Updated Branches:
  refs/heads/master eb075f9b7 -> da37eb247


Separate Metrics struct from Master class.

Review: https://reviews.apache.org/r/27744/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/da37eb24
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/da37eb24
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/da37eb24

Branch: refs/heads/master
Commit: da37eb24735471585cbcdcb89b568892095c3379
Parents: eb075f9
Author: Dominic Hamon <[email protected]>
Authored: Thu Oct 16 11:47:43 2014 -0700
Committer: Dominic Hamon <[email protected]>
Committed: Fri Nov 14 14:23:53 2014 -0800

----------------------------------------------------------------------
 src/Makefile.am             |   7 +-
 src/master/master.cpp       | 313 +-----------------------------------
 src/master/master.hpp       | 134 +---------------
 src/master/metrics.cpp      | 339 +++++++++++++++++++++++++++++++++++++++
 src/master/metrics.hpp      | 169 +++++++++++++++++++
 src/tests/metrics_tests.cpp | 147 +++++++++++++++++
 6 files changed, 664 insertions(+), 445 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index d5cad0a..0fe7dd0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -274,6 +274,7 @@ libmesos_no_3rdparty_la_SOURCES =                           
        \
        master/drf_sorter.cpp                                           \
        master/http.cpp                                                 \
        master/master.cpp                                               \
+       master/metrics.cpp                                      \
        master/registry.hpp                                             \
        master/registry.proto                                           \
        master/registrar.cpp                                            \
@@ -427,6 +428,7 @@ libmesos_no_3rdparty_la_SOURCES +=                          
        \
        master/flags.hpp                                                \
        master/hierarchical_allocator_process.hpp                       \
        master/master.hpp                                               \
+       master/metrics.hpp                                      \
        master/repairer.hpp                                             \
        master/registrar.hpp                                            \
        master/sorter.hpp                                               \
@@ -1200,6 +1202,7 @@ mesos_tests_SOURCES =                             \
   tests/attributes_tests.cpp                   \
   tests/authentication_tests.cpp               \
   tests/authorization_tests.cpp                        \
+  tests/common/http_tests.cpp                                                  
\
   tests/composing_containerizer_tests.cpp       \
   tests/containerizer.cpp                      \
   tests/containerizer_tests.cpp                        \
@@ -1226,6 +1229,7 @@ mesos_tests_SOURCES =                             \
   tests/master_slave_reconciliation_tests.cpp  \
   tests/master_tests.cpp                       \
   tests/mesos.cpp                              \
+  tests/metrics_tests.cpp                              \
   tests/module.cpp                             \
   tests/module_tests.cpp                       \
   tests/monitor_tests.cpp                      \
@@ -1248,8 +1252,7 @@ mesos_tests_SOURCES =                             \
   tests/status_update_manager_tests.cpp                \
   tests/utils.cpp                              \
   tests/values_tests.cpp                       \
-  tests/zookeeper_url_tests.cpp                        \
-  tests/common/http_tests.cpp
+  tests/zookeeper_url_tests.cpp
 
 mesos_tests_CPPFLAGS = $(MESOS_CPPFLAGS)
 mesos_tests_CPPFLAGS += -DSOURCE_DIR=\"$(abs_top_srcdir)\"

http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 0f89d1f..4b5d582 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4625,7 +4625,7 @@ void Master::updateTask(Task* task, const StatusUpdate& 
update)
   CHECK_NOTNULL(task);
 
   // Get the unacknowledged status.
-  const TaskStatus& status  = update.status();
+  const TaskStatus& status = update.status();
 
   // Out-of-order updates should not occur, however in case they
   // do (e.g., due to bugs), prevent them here to ensure that the
@@ -5052,317 +5052,6 @@ double Master::_tasks_running()
 }
 
 
-// TODO(dhamon): Consider moving to master/metrics.cpp|hpp.
-// Message counters are named with "messages_" prefix so they can
-// be grouped together alphabetically in the output.
-// TODO(alexandra.sava): Add metrics for registered and removed slaves.
-Master::Metrics::Metrics(const Master& master)
-  : uptime_secs(
-        "master/uptime_secs",
-        defer(master, &Master::_uptime_secs)),
-    elected(
-        "master/elected",
-        defer(master, &Master::_elected)),
-    slaves_connected(
-        "master/slaves_connected",
-        defer(master, &Master::_slaves_connected)),
-    slaves_disconnected(
-        "master/slaves_disconnected",
-        defer(master, &Master::_slaves_disconnected)),
-    slaves_active(
-        "master/slaves_active",
-        defer(master, &Master::_slaves_active)),
-    slaves_inactive(
-        "master/slaves_inactive",
-        defer(master, &Master::_slaves_inactive)),
-    frameworks_connected(
-        "master/frameworks_connected",
-        defer(master, &Master::_frameworks_connected)),
-    frameworks_disconnected(
-        "master/frameworks_disconnected",
-        defer(master, &Master::_frameworks_disconnected)),
-    frameworks_active(
-        "master/frameworks_active",
-        defer(master, &Master::_frameworks_active)),
-    frameworks_inactive(
-        "master/frameworks_inactive",
-        defer(master, &Master::_frameworks_inactive)),
-    outstanding_offers(
-        "master/outstanding_offers",
-        defer(master, &Master::_outstanding_offers)),
-    tasks_staging(
-        "master/tasks_staging",
-        defer(master, &Master::_tasks_staging)),
-    tasks_starting(
-        "master/tasks_starting",
-        defer(master, &Master::_tasks_starting)),
-    tasks_running(
-        "master/tasks_running",
-        defer(master, &Master::_tasks_running)),
-    tasks_finished(
-        "master/tasks_finished"),
-    tasks_failed(
-        "master/tasks_failed"),
-    tasks_killed(
-        "master/tasks_killed"),
-    tasks_lost(
-        "master/tasks_lost"),
-    tasks_error(
-        "master/tasks_error"),
-    dropped_messages(
-        "master/dropped_messages"),
-    messages_register_framework(
-        "master/messages_register_framework"),
-    messages_reregister_framework(
-        "master/messages_reregister_framework"),
-    messages_unregister_framework(
-        "master/messages_unregister_framework"),
-    messages_deactivate_framework(
-        "master/messages_deactivate_framework"),
-    messages_kill_task(
-        "master/messages_kill_task"),
-    messages_status_update_acknowledgement(
-        "master/messages_status_update_acknowledgement"),
-    messages_resource_request(
-        "master/messages_resource_request"),
-    messages_launch_tasks(
-        "master/messages_launch_tasks"),
-    messages_decline_offers(
-        "master/messages_decline_offers"),
-    messages_revive_offers(
-        "master/messages_revive_offers"),
-    messages_reconcile_tasks(
-        "master/messages_reconcile_tasks"),
-    messages_framework_to_executor(
-        "master/messages_framework_to_executor"),
-    messages_register_slave(
-        "master/messages_register_slave"),
-    messages_reregister_slave(
-        "master/messages_reregister_slave"),
-    messages_unregister_slave(
-        "master/messages_unregister_slave"),
-    messages_status_update(
-        "master/messages_status_update"),
-    messages_exited_executor(
-        "master/messages_exited_executor"),
-    messages_authenticate(
-        "master/messages_authenticate"),
-    valid_framework_to_executor_messages(
-        "master/valid_framework_to_executor_messages"),
-    invalid_framework_to_executor_messages(
-        "master/invalid_framework_to_executor_messages"),
-    valid_status_updates(
-        "master/valid_status_updates"),
-    invalid_status_updates(
-        "master/invalid_status_updates"),
-    valid_status_update_acknowledgements(
-        "master/valid_status_update_acknowledgements"),
-    invalid_status_update_acknowledgements(
-        "master/invalid_status_update_acknowledgements"),
-    recovery_slave_removals(
-        "master/recovery_slave_removals"),
-    event_queue_messages(
-        "master/event_queue_messages",
-        defer(master, &Master::_event_queue_messages)),
-    event_queue_dispatches(
-        "master/event_queue_dispatches",
-        defer(master, &Master::_event_queue_dispatches)),
-    event_queue_http_requests(
-        "master/event_queue_http_requests",
-        defer(master, &Master::_event_queue_http_requests)),
-    slave_registrations(
-        "master/slave_registrations"),
-    slave_reregistrations(
-        "master/slave_reregistrations"),
-    slave_removals(
-        "master/slave_removals")
-{
-  // TODO(dhamon): Check return values of 'add'.
-  process::metrics::add(uptime_secs);
-  process::metrics::add(elected);
-
-  process::metrics::add(slaves_connected);
-  process::metrics::add(slaves_disconnected);
-  process::metrics::add(slaves_active);
-  process::metrics::add(slaves_inactive);
-
-  process::metrics::add(frameworks_connected);
-  process::metrics::add(frameworks_disconnected);
-  process::metrics::add(frameworks_active);
-  process::metrics::add(frameworks_inactive);
-
-  process::metrics::add(outstanding_offers);
-
-  process::metrics::add(tasks_staging);
-  process::metrics::add(tasks_starting);
-  process::metrics::add(tasks_running);
-  process::metrics::add(tasks_finished);
-  process::metrics::add(tasks_failed);
-  process::metrics::add(tasks_killed);
-  process::metrics::add(tasks_lost);
-  process::metrics::add(tasks_error);
-
-  process::metrics::add(dropped_messages);
-
-  // Messages from schedulers.
-  process::metrics::add(messages_register_framework);
-  process::metrics::add(messages_reregister_framework);
-  process::metrics::add(messages_unregister_framework);
-  process::metrics::add(messages_deactivate_framework);
-  process::metrics::add(messages_kill_task);
-  process::metrics::add(messages_status_update_acknowledgement);
-  process::metrics::add(messages_resource_request);
-  process::metrics::add(messages_launch_tasks);
-  process::metrics::add(messages_decline_offers);
-  process::metrics::add(messages_revive_offers);
-  process::metrics::add(messages_reconcile_tasks);
-  process::metrics::add(messages_framework_to_executor);
-
-  // Messages from slaves.
-  process::metrics::add(messages_register_slave);
-  process::metrics::add(messages_reregister_slave);
-  process::metrics::add(messages_unregister_slave);
-  process::metrics::add(messages_status_update);
-  process::metrics::add(messages_exited_executor);
-
-  // Messages from both schedulers and slaves.
-  process::metrics::add(messages_authenticate);
-
-  process::metrics::add(valid_framework_to_executor_messages);
-  process::metrics::add(invalid_framework_to_executor_messages);
-
-  process::metrics::add(valid_status_updates);
-  process::metrics::add(invalid_status_updates);
-
-  process::metrics::add(valid_status_update_acknowledgements);
-  process::metrics::add(invalid_status_update_acknowledgements);
-
-  process::metrics::add(recovery_slave_removals);
-
-  process::metrics::add(event_queue_messages);
-  process::metrics::add(event_queue_dispatches);
-  process::metrics::add(event_queue_http_requests);
-
-  process::metrics::add(slave_registrations);
-  process::metrics::add(slave_reregistrations);
-  process::metrics::add(slave_removals);
-
-  // Create resource gauges.
-  // TODO(dhamon): Set these up dynamically when adding a slave based
-  // on the resources the slave exposes.
-  const string resources[] = {"cpus", "mem", "disk"};
-
-  foreach (const string& resource, resources) {
-    process::metrics::Gauge totalGauge(
-        "master/" + resource + "_total",
-        defer(master, &Master::_resources_total, resource));
-    resources_total.push_back(totalGauge);
-    process::metrics::add(totalGauge);
-
-    process::metrics::Gauge usedGauge(
-        "master/" + resource + "_used",
-        defer(master, &Master::_resources_used, resource));
-    resources_used.push_back(usedGauge);
-    process::metrics::add(usedGauge);
-
-    process::metrics::Gauge percentGauge(
-        "master/" + resource + "_percent",
-        defer(master, &Master::_resources_percent, resource));
-    resources_percent.push_back(percentGauge);
-    process::metrics::add(percentGauge);
-  }
-}
-
-
-Master::Metrics::~Metrics()
-{
-  // TODO(dhamon): Check return values of 'remove'.
-  process::metrics::remove(uptime_secs);
-  process::metrics::remove(elected);
-
-  process::metrics::remove(slaves_connected);
-  process::metrics::remove(slaves_disconnected);
-  process::metrics::remove(slaves_active);
-  process::metrics::remove(slaves_inactive);
-
-  process::metrics::remove(frameworks_connected);
-  process::metrics::remove(frameworks_disconnected);
-  process::metrics::remove(frameworks_active);
-  process::metrics::remove(frameworks_inactive);
-
-  process::metrics::remove(outstanding_offers);
-
-  process::metrics::remove(tasks_staging);
-  process::metrics::remove(tasks_starting);
-  process::metrics::remove(tasks_running);
-  process::metrics::remove(tasks_finished);
-  process::metrics::remove(tasks_failed);
-  process::metrics::remove(tasks_killed);
-  process::metrics::remove(tasks_lost);
-  process::metrics::remove(tasks_error);
-
-  process::metrics::remove(dropped_messages);
-
-  // Messages from schedulers.
-  process::metrics::remove(messages_register_framework);
-  process::metrics::remove(messages_reregister_framework);
-  process::metrics::remove(messages_unregister_framework);
-  process::metrics::remove(messages_deactivate_framework);
-  process::metrics::remove(messages_kill_task);
-  process::metrics::remove(messages_status_update_acknowledgement);
-  process::metrics::remove(messages_resource_request);
-  process::metrics::remove(messages_launch_tasks);
-  process::metrics::remove(messages_decline_offers);
-  process::metrics::remove(messages_revive_offers);
-  process::metrics::remove(messages_reconcile_tasks);
-  process::metrics::remove(messages_framework_to_executor);
-
-  // Messages from slaves.
-  process::metrics::remove(messages_register_slave);
-  process::metrics::remove(messages_reregister_slave);
-  process::metrics::remove(messages_unregister_slave);
-  process::metrics::remove(messages_status_update);
-  process::metrics::remove(messages_exited_executor);
-
-  // Messages from both schedulers and slaves.
-  process::metrics::remove(messages_authenticate);
-
-  process::metrics::remove(valid_framework_to_executor_messages);
-  process::metrics::remove(invalid_framework_to_executor_messages);
-
-  process::metrics::remove(valid_status_updates);
-  process::metrics::remove(invalid_status_updates);
-
-  process::metrics::remove(valid_status_update_acknowledgements);
-  process::metrics::remove(invalid_status_update_acknowledgements);
-
-  process::metrics::remove(recovery_slave_removals);
-
-  process::metrics::remove(event_queue_messages);
-  process::metrics::remove(event_queue_dispatches);
-  process::metrics::remove(event_queue_http_requests);
-
-  process::metrics::remove(slave_registrations);
-  process::metrics::remove(slave_reregistrations);
-  process::metrics::remove(slave_removals);
-
-  foreach (const process::metrics::Gauge& gauge, resources_total) {
-    process::metrics::remove(gauge);
-  }
-  resources_total.clear();
-
-  foreach (const process::metrics::Gauge& gauge, resources_used) {
-    process::metrics::remove(gauge);
-  }
-  resources_used.clear();
-
-  foreach (const process::metrics::Gauge& gauge, resources_percent) {
-    process::metrics::remove(gauge);
-  }
-  resources_percent.clear();
-}
-
-
 double Master::_resources_total(const std::string& name)
 {
   double total = 0.0;

http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 47f3bc9..ece36c3 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -35,10 +35,6 @@
 #include <process/protobuf.hpp>
 #include <process/timer.hpp>
 
-#include <process/metrics/counter.hpp>
-#include <process/metrics/gauge.hpp>
-#include <process/metrics/metrics.hpp>
-
 #include <stout/cache.hpp>
 #include <stout/foreach.hpp>
 #include <stout/hashmap.hpp>
@@ -56,6 +52,7 @@
 #include "master/contender.hpp"
 #include "master/detector.hpp"
 #include "master/flags.hpp"
+#include "master/metrics.hpp"
 #include "master/registrar.hpp"
 
 #include "messages/messages.hpp"
@@ -475,6 +472,7 @@ private:
   Master& operator = (const Master&); // No assigning.
 
   friend struct OfferVisitor;
+  friend struct Metrics;
 
   const Flags flags;
 
@@ -607,133 +605,7 @@ private:
     uint64_t invalidFrameworkMessages;
   } stats;
 
-  struct Metrics
-  {
-    Metrics(const Master& master);
-
-    ~Metrics();
-
-    process::metrics::Gauge uptime_secs;
-    process::metrics::Gauge elected;
-
-    process::metrics::Gauge slaves_connected;
-    process::metrics::Gauge slaves_disconnected;
-    process::metrics::Gauge slaves_active;
-    process::metrics::Gauge slaves_inactive;
-
-    process::metrics::Gauge frameworks_connected;
-    process::metrics::Gauge frameworks_disconnected;
-    process::metrics::Gauge frameworks_active;
-    process::metrics::Gauge frameworks_inactive;
-
-    process::metrics::Gauge outstanding_offers;
-
-    // Task state metrics.
-    process::metrics::Gauge tasks_staging;
-    process::metrics::Gauge tasks_starting;
-    process::metrics::Gauge tasks_running;
-    process::metrics::Counter tasks_finished;
-    process::metrics::Counter tasks_failed;
-    process::metrics::Counter tasks_killed;
-    process::metrics::Counter tasks_lost;
-    process::metrics::Counter tasks_error;
-
-    // Message counters.
-    process::metrics::Counter dropped_messages;
-
-    // Metrics specific to frameworks of a common principal.
-    // These metrics have names prefixed by "frameworks/<principal>/".
-    struct Frameworks
-    {
-      // Counters for messages from all frameworks of this principal.
-      // Note: We only count messages from active scheduler
-      // *instances* while they are *registered*. i.e., messages
-      // prior to the completion of (re)registration
-      // (AuthenticateMessage and (Re)RegisterFrameworkMessage) and
-      // messages from an inactive scheduler instance (after the
-      // framework has failed over) are not counted.
-
-      // Framework messages received (before processing).
-      process::metrics::Counter messages_received;
-
-      // Framework messages processed.
-      // NOTE: This doesn't include dropped messages. Processing of
-      // a message may be throttled by a RateLimiter if one is
-      // configured for this principal. Also due to Master's
-      // asynchronous nature, this doesn't necessarily mean the work
-      // requested by this message has finished.
-      process::metrics::Counter messages_processed;
-
-      explicit Frameworks(const std::string& principal)
-        : messages_received("frameworks/" + principal + "/messages_received"),
-          messages_processed("frameworks/" + principal + "/messages_processed")
-      {
-        process::metrics::add(messages_received);
-        process::metrics::add(messages_processed);
-      }
-
-      ~Frameworks()
-      {
-        process::metrics::remove(messages_received);
-        process::metrics::remove(messages_processed);
-      }
-    };
-
-    // Per-framework-principal metrics keyed by the framework
-    // principal.
-    hashmap<std::string, process::Owned<Frameworks>> frameworks;
-
-    // Messages from schedulers.
-    process::metrics::Counter messages_register_framework;
-    process::metrics::Counter messages_reregister_framework;
-    process::metrics::Counter messages_unregister_framework;
-    process::metrics::Counter messages_deactivate_framework;
-    process::metrics::Counter messages_kill_task;
-    process::metrics::Counter messages_status_update_acknowledgement;
-    process::metrics::Counter messages_resource_request;
-    process::metrics::Counter messages_launch_tasks;
-    process::metrics::Counter messages_decline_offers;
-    process::metrics::Counter messages_revive_offers;
-    process::metrics::Counter messages_reconcile_tasks;
-    process::metrics::Counter messages_framework_to_executor;
-
-    // Messages from slaves.
-    process::metrics::Counter messages_register_slave;
-    process::metrics::Counter messages_reregister_slave;
-    process::metrics::Counter messages_unregister_slave;
-    process::metrics::Counter messages_status_update;
-    process::metrics::Counter messages_exited_executor;
-
-    // Messages from both schedulers and slaves.
-    process::metrics::Counter messages_authenticate;
-
-    process::metrics::Counter valid_framework_to_executor_messages;
-    process::metrics::Counter invalid_framework_to_executor_messages;
-
-    process::metrics::Counter valid_status_updates;
-    process::metrics::Counter invalid_status_updates;
-
-    process::metrics::Counter valid_status_update_acknowledgements;
-    process::metrics::Counter invalid_status_update_acknowledgements;
-
-    // Recovery counters.
-    process::metrics::Counter recovery_slave_removals;
-
-    // Process metrics.
-    process::metrics::Gauge event_queue_messages;
-    process::metrics::Gauge event_queue_dispatches;
-    process::metrics::Gauge event_queue_http_requests;
-
-    // Successful registry operations.
-    process::metrics::Counter slave_registrations;
-    process::metrics::Counter slave_reregistrations;
-    process::metrics::Counter slave_removals;
-
-    // Resource metrics.
-    std::vector<process::metrics::Gauge> resources_total;
-    std::vector<process::metrics::Gauge> resources_used;
-    std::vector<process::metrics::Gauge> resources_percent;
-  } metrics;
+  Metrics metrics;
 
   // Gauge handlers.
   double _uptime_secs()

http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/metrics.cpp
----------------------------------------------------------------------
diff --git a/src/master/metrics.cpp b/src/master/metrics.cpp
new file mode 100644
index 0000000..a7ac96d
--- /dev/null
+++ b/src/master/metrics.cpp
@@ -0,0 +1,339 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "master/metrics.hpp"
+
+#include "master/master.hpp"
+
+namespace mesos {
+namespace internal {
+namespace master {
+
+// Message counters are named with "messages_" prefix so they can
+// be grouped together alphabetically in the output.
+// TODO(alexandra.sava): Add metrics for registered and removed slaves.
+Metrics::Metrics(const Master& master)
+  : uptime_secs(
+        "master/uptime_secs",
+        defer(master, &Master::_uptime_secs)),
+    elected(
+        "master/elected",
+        defer(master, &Master::_elected)),
+    slaves_connected(
+        "master/slaves_connected",
+        defer(master, &Master::_slaves_connected)),
+    slaves_disconnected(
+        "master/slaves_disconnected",
+        defer(master, &Master::_slaves_disconnected)),
+    slaves_active(
+        "master/slaves_active",
+        defer(master, &Master::_slaves_active)),
+    slaves_inactive(
+        "master/slaves_inactive",
+        defer(master, &Master::_slaves_inactive)),
+    frameworks_connected(
+        "master/frameworks_connected",
+        defer(master, &Master::_frameworks_connected)),
+    frameworks_disconnected(
+        "master/frameworks_disconnected",
+        defer(master, &Master::_frameworks_disconnected)),
+    frameworks_active(
+        "master/frameworks_active",
+        defer(master, &Master::_frameworks_active)),
+    frameworks_inactive(
+        "master/frameworks_inactive",
+        defer(master, &Master::_frameworks_inactive)),
+    outstanding_offers(
+        "master/outstanding_offers",
+        defer(master, &Master::_outstanding_offers)),
+    tasks_staging(
+        "master/tasks_staging",
+        defer(master, &Master::_tasks_staging)),
+    tasks_starting(
+        "master/tasks_starting",
+        defer(master, &Master::_tasks_starting)),
+    tasks_running(
+        "master/tasks_running",
+        defer(master, &Master::_tasks_running)),
+    tasks_finished(
+        "master/tasks_finished"),
+    tasks_failed(
+        "master/tasks_failed"),
+    tasks_killed(
+        "master/tasks_killed"),
+    tasks_lost(
+        "master/tasks_lost"),
+    tasks_error(
+        "master/tasks_error"),
+    dropped_messages(
+        "master/dropped_messages"),
+    messages_register_framework(
+        "master/messages_register_framework"),
+    messages_reregister_framework(
+        "master/messages_reregister_framework"),
+    messages_unregister_framework(
+        "master/messages_unregister_framework"),
+    messages_deactivate_framework(
+        "master/messages_deactivate_framework"),
+    messages_kill_task(
+        "master/messages_kill_task"),
+    messages_status_update_acknowledgement(
+        "master/messages_status_update_acknowledgement"),
+    messages_resource_request(
+        "master/messages_resource_request"),
+    messages_launch_tasks(
+        "master/messages_launch_tasks"),
+    messages_decline_offers(
+        "master/messages_decline_offers"),
+    messages_revive_offers(
+        "master/messages_revive_offers"),
+    messages_reconcile_tasks(
+        "master/messages_reconcile_tasks"),
+    messages_framework_to_executor(
+        "master/messages_framework_to_executor"),
+    messages_register_slave(
+        "master/messages_register_slave"),
+    messages_reregister_slave(
+        "master/messages_reregister_slave"),
+    messages_unregister_slave(
+        "master/messages_unregister_slave"),
+    messages_status_update(
+        "master/messages_status_update"),
+    messages_exited_executor(
+        "master/messages_exited_executor"),
+    messages_authenticate(
+        "master/messages_authenticate"),
+    valid_framework_to_executor_messages(
+        "master/valid_framework_to_executor_messages"),
+    invalid_framework_to_executor_messages(
+        "master/invalid_framework_to_executor_messages"),
+    valid_status_updates(
+        "master/valid_status_updates"),
+    invalid_status_updates(
+        "master/invalid_status_updates"),
+    valid_status_update_acknowledgements(
+        "master/valid_status_update_acknowledgements"),
+    invalid_status_update_acknowledgements(
+        "master/invalid_status_update_acknowledgements"),
+    recovery_slave_removals(
+        "master/recovery_slave_removals"),
+    event_queue_messages(
+        "master/event_queue_messages",
+        defer(master, &Master::_event_queue_messages)),
+    event_queue_dispatches(
+        "master/event_queue_dispatches",
+        defer(master, &Master::_event_queue_dispatches)),
+    event_queue_http_requests(
+        "master/event_queue_http_requests",
+        defer(master, &Master::_event_queue_http_requests)),
+    slave_registrations(
+        "master/slave_registrations"),
+    slave_reregistrations(
+        "master/slave_reregistrations"),
+    slave_removals(
+        "master/slave_removals")
+{
+  // TODO(dhamon): Check return values of 'add'.
+  process::metrics::add(uptime_secs);
+  process::metrics::add(elected);
+
+  process::metrics::add(slaves_connected);
+  process::metrics::add(slaves_disconnected);
+  process::metrics::add(slaves_active);
+  process::metrics::add(slaves_inactive);
+
+  process::metrics::add(frameworks_connected);
+  process::metrics::add(frameworks_disconnected);
+  process::metrics::add(frameworks_active);
+  process::metrics::add(frameworks_inactive);
+
+  process::metrics::add(outstanding_offers);
+
+  process::metrics::add(tasks_staging);
+  process::metrics::add(tasks_starting);
+  process::metrics::add(tasks_running);
+  process::metrics::add(tasks_finished);
+  process::metrics::add(tasks_failed);
+  process::metrics::add(tasks_killed);
+  process::metrics::add(tasks_lost);
+  process::metrics::add(tasks_error);
+
+  process::metrics::add(dropped_messages);
+
+  // Messages from schedulers.
+  process::metrics::add(messages_register_framework);
+  process::metrics::add(messages_reregister_framework);
+  process::metrics::add(messages_unregister_framework);
+  process::metrics::add(messages_deactivate_framework);
+  process::metrics::add(messages_kill_task);
+  process::metrics::add(messages_status_update_acknowledgement);
+  process::metrics::add(messages_resource_request);
+  process::metrics::add(messages_launch_tasks);
+  process::metrics::add(messages_decline_offers);
+  process::metrics::add(messages_revive_offers);
+  process::metrics::add(messages_reconcile_tasks);
+  process::metrics::add(messages_framework_to_executor);
+
+  // Messages from slaves.
+  process::metrics::add(messages_register_slave);
+  process::metrics::add(messages_reregister_slave);
+  process::metrics::add(messages_unregister_slave);
+  process::metrics::add(messages_status_update);
+  process::metrics::add(messages_exited_executor);
+
+  // Messages from both schedulers and slaves.
+  process::metrics::add(messages_authenticate);
+
+  process::metrics::add(valid_framework_to_executor_messages);
+  process::metrics::add(invalid_framework_to_executor_messages);
+
+  process::metrics::add(valid_status_updates);
+  process::metrics::add(invalid_status_updates);
+
+  process::metrics::add(valid_status_update_acknowledgements);
+  process::metrics::add(invalid_status_update_acknowledgements);
+
+  process::metrics::add(recovery_slave_removals);
+
+  process::metrics::add(event_queue_messages);
+  process::metrics::add(event_queue_dispatches);
+  process::metrics::add(event_queue_http_requests);
+
+  process::metrics::add(slave_registrations);
+  process::metrics::add(slave_reregistrations);
+  process::metrics::add(slave_removals);
+
+  // Create resource gauges.
+  // TODO(dhamon): Set these up dynamically when adding a slave based on the
+  // resources the slave exposes.
+  const std::string resources[] = {"cpus", "mem", "disk"};
+
+  foreach (const std::string& resource, resources) {
+    process::metrics::Gauge totalGauge(
+        "master/" + resource + "_total",
+        defer(master, &Master::_resources_total, resource));
+    resources_total.push_back(totalGauge);
+    process::metrics::add(totalGauge);
+
+    process::metrics::Gauge usedGauge(
+        "master/" + resource + "_used",
+        defer(master, &Master::_resources_used, resource));
+    resources_used.push_back(usedGauge);
+    process::metrics::add(usedGauge);
+
+    process::metrics::Gauge percentGauge(
+        "master/" + resource + "_percent",
+        defer(master, &Master::_resources_percent, resource));
+    resources_percent.push_back(percentGauge);
+    process::metrics::add(percentGauge);
+  }
+}
+
+
+Metrics::~Metrics()
+{
+  // TODO(dhamon): Check return values of 'remove'.
+  process::metrics::remove(uptime_secs);
+  process::metrics::remove(elected);
+
+  process::metrics::remove(slaves_connected);
+  process::metrics::remove(slaves_disconnected);
+  process::metrics::remove(slaves_active);
+  process::metrics::remove(slaves_inactive);
+
+  process::metrics::remove(frameworks_connected);
+  process::metrics::remove(frameworks_disconnected);
+  process::metrics::remove(frameworks_active);
+  process::metrics::remove(frameworks_inactive);
+
+  process::metrics::remove(outstanding_offers);
+
+  process::metrics::remove(tasks_staging);
+  process::metrics::remove(tasks_starting);
+  process::metrics::remove(tasks_running);
+  process::metrics::remove(tasks_finished);
+  process::metrics::remove(tasks_failed);
+  process::metrics::remove(tasks_killed);
+  process::metrics::remove(tasks_lost);
+  process::metrics::remove(tasks_error);
+
+  process::metrics::remove(dropped_messages);
+
+  // Messages from schedulers.
+  process::metrics::remove(messages_register_framework);
+  process::metrics::remove(messages_reregister_framework);
+  process::metrics::remove(messages_unregister_framework);
+  process::metrics::remove(messages_deactivate_framework);
+  process::metrics::remove(messages_kill_task);
+  process::metrics::remove(messages_status_update_acknowledgement);
+  process::metrics::remove(messages_resource_request);
+  process::metrics::remove(messages_launch_tasks);
+  process::metrics::remove(messages_decline_offers);
+  process::metrics::remove(messages_revive_offers);
+  process::metrics::remove(messages_reconcile_tasks);
+  process::metrics::remove(messages_framework_to_executor);
+
+  // Messages from slaves.
+  process::metrics::remove(messages_register_slave);
+  process::metrics::remove(messages_reregister_slave);
+  process::metrics::remove(messages_unregister_slave);
+  process::metrics::remove(messages_status_update);
+  process::metrics::remove(messages_exited_executor);
+
+  // Messages from both schedulers and slaves.
+  process::metrics::remove(messages_authenticate);
+
+  process::metrics::remove(valid_framework_to_executor_messages);
+  process::metrics::remove(invalid_framework_to_executor_messages);
+
+  process::metrics::remove(valid_status_updates);
+  process::metrics::remove(invalid_status_updates);
+
+  process::metrics::remove(valid_status_update_acknowledgements);
+  process::metrics::remove(invalid_status_update_acknowledgements);
+
+  process::metrics::remove(recovery_slave_removals);
+
+  process::metrics::remove(event_queue_messages);
+  process::metrics::remove(event_queue_dispatches);
+  process::metrics::remove(event_queue_http_requests);
+
+  process::metrics::remove(slave_registrations);
+  process::metrics::remove(slave_reregistrations);
+  process::metrics::remove(slave_removals);
+
+  foreach (const process::metrics::Gauge& gauge, resources_total) {
+    process::metrics::remove(gauge);
+  }
+  resources_total.clear();
+
+  foreach (const process::metrics::Gauge& gauge, resources_used) {
+    process::metrics::remove(gauge);
+  }
+  resources_used.clear();
+
+  foreach (const process::metrics::Gauge& gauge, resources_percent) {
+    process::metrics::remove(gauge);
+  }
+  resources_percent.clear();
+}
+
+
+} // namespace master {
+} // namespace internal {
+} // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/metrics.hpp
----------------------------------------------------------------------
diff --git a/src/master/metrics.hpp b/src/master/metrics.hpp
new file mode 100644
index 0000000..5e6b6d5
--- /dev/null
+++ b/src/master/metrics.hpp
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MASTER_METRICS_HPP__
+#define __MASTER_METRICS_HPP__
+
+#include <string>
+#include <vector>
+
+#include <process/metrics/counter.hpp>
+#include <process/metrics/gauge.hpp>
+#include <process/metrics/metrics.hpp>
+
+#include <stout/hashmap.hpp>
+
+namespace mesos {
+namespace internal {
+namespace master {
+
+class Master;
+
+struct Metrics
+{
+  Metrics(const Master& master);
+
+  ~Metrics();
+
+  process::metrics::Gauge uptime_secs;
+  process::metrics::Gauge elected;
+
+  process::metrics::Gauge slaves_connected;
+  process::metrics::Gauge slaves_disconnected;
+  process::metrics::Gauge slaves_active;
+  process::metrics::Gauge slaves_inactive;
+
+  process::metrics::Gauge frameworks_connected;
+  process::metrics::Gauge frameworks_disconnected;
+  process::metrics::Gauge frameworks_active;
+  process::metrics::Gauge frameworks_inactive;
+
+  process::metrics::Gauge outstanding_offers;
+
+  // Task state metrics.
+  process::metrics::Gauge tasks_staging;
+  process::metrics::Gauge tasks_starting;
+  process::metrics::Gauge tasks_running;
+  process::metrics::Counter tasks_finished;
+  process::metrics::Counter tasks_failed;
+  process::metrics::Counter tasks_killed;
+  process::metrics::Counter tasks_lost;
+  process::metrics::Counter tasks_error;
+
+  // Message counters.
+  process::metrics::Counter dropped_messages;
+
+  // Metrics specific to frameworks of a common principal.
+  // These metrics have names prefixed by "frameworks/<principal>/".
+  struct Frameworks
+  {
+    // Counters for messages from all frameworks of this principal.
+    // Note: We only count messages from active scheduler
+    // *instances* while they are *registered*. i.e., messages
+    // prior to the completion of (re)registration
+    // (AuthenticateMessage and (Re)RegisterFrameworkMessage) and
+    // messages from an inactive scheduler instance (after the
+    // framework has failed over) are not counted.
+
+    // Framework messages received (before processing).
+    process::metrics::Counter messages_received;
+
+    // Framework messages processed.
+    // NOTE: This doesn't include dropped messages. Processing of
+    // a message may be throttled by a RateLimiter if one is
+    // configured for this principal. Also due to Master's
+    // asynchronous nature, this doesn't necessarily mean the work
+    // requested by this message has finished.
+    process::metrics::Counter messages_processed;
+
+    explicit Frameworks(const std::string& principal)
+      : messages_received("frameworks/" + principal + "/messages_received"),
+        messages_processed("frameworks/" + principal + "/messages_processed")
+    {
+      process::metrics::add(messages_received);
+      process::metrics::add(messages_processed);
+    }
+
+    ~Frameworks()
+    {
+      process::metrics::remove(messages_received);
+      process::metrics::remove(messages_processed);
+    }
+  };
+
+  // Per-framework-principal metrics keyed by the framework
+  // principal.
+  hashmap<std::string, process::Owned<Frameworks>> frameworks;
+
+  // Messages from schedulers.
+  process::metrics::Counter messages_register_framework;
+  process::metrics::Counter messages_reregister_framework;
+  process::metrics::Counter messages_unregister_framework;
+  process::metrics::Counter messages_deactivate_framework;
+  process::metrics::Counter messages_kill_task;
+  process::metrics::Counter messages_status_update_acknowledgement;
+  process::metrics::Counter messages_resource_request;
+  process::metrics::Counter messages_launch_tasks;
+  process::metrics::Counter messages_decline_offers;
+  process::metrics::Counter messages_revive_offers;
+  process::metrics::Counter messages_reconcile_tasks;
+  process::metrics::Counter messages_framework_to_executor;
+
+  // Messages from slaves.
+  process::metrics::Counter messages_register_slave;
+  process::metrics::Counter messages_reregister_slave;
+  process::metrics::Counter messages_unregister_slave;
+  process::metrics::Counter messages_status_update;
+  process::metrics::Counter messages_exited_executor;
+
+  // Messages from both schedulers and slaves.
+  process::metrics::Counter messages_authenticate;
+
+  process::metrics::Counter valid_framework_to_executor_messages;
+  process::metrics::Counter invalid_framework_to_executor_messages;
+
+  process::metrics::Counter valid_status_updates;
+  process::metrics::Counter invalid_status_updates;
+
+  process::metrics::Counter valid_status_update_acknowledgements;
+  process::metrics::Counter invalid_status_update_acknowledgements;
+
+  // Recovery counters.
+  process::metrics::Counter recovery_slave_removals;
+
+  // Process metrics.
+  process::metrics::Gauge event_queue_messages;
+  process::metrics::Gauge event_queue_dispatches;
+  process::metrics::Gauge event_queue_http_requests;
+
+  // Successful registry operations.
+  process::metrics::Counter slave_registrations;
+  process::metrics::Counter slave_reregistrations;
+  process::metrics::Counter slave_removals;
+
+  // Resource metrics.
+  std::vector<process::metrics::Gauge> resources_total;
+  std::vector<process::metrics::Gauge> resources_used;
+  std::vector<process::metrics::Gauge> resources_percent;
+};
+
+} // namespace master {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __MASTER_METRICS_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/tests/metrics_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/metrics_tests.cpp b/src/tests/metrics_tests.cpp
new file mode 100644
index 0000000..72571f2
--- /dev/null
+++ b/src/tests/metrics_tests.cpp
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <process/future.hpp>
+#include <process/http.hpp>
+#include <process/pid.hpp>
+
+#include <stout/gtest.hpp>
+#include <stout/try.hpp>
+
+#include "master/master.hpp"
+#include "tests/mesos.hpp"
+
+using namespace mesos;
+using namespace mesos::internal;
+
+using mesos::internal::master::Master;
+
+class MetricsTest : public mesos::internal::tests::MesosTest {};
+
+TEST_F(MetricsTest, Master)
+{
+  Try<process::PID<Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Get the snapshot.
+  process::UPID upid("metrics", process::node());
+
+  process::Future<process::http::Response> response =
+      process::http::get(upid, "snapshot");
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, response);
+
+  EXPECT_SOME_EQ(
+      "application/json",
+      response.get().headers.get("Content-Type"));
+
+  Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body);
+  ASSERT_SOME(parse);
+
+  JSON::Object stats = parse.get();
+
+  EXPECT_EQ(1u, stats.values.count("master/uptime_secs"));
+
+  EXPECT_EQ(1u, stats.values.count("master/elected"));
+
+  EXPECT_EQ(1u, stats.values.count("master/slaves_connected"));
+  EXPECT_EQ(1u, stats.values.count("master/slaves_disconnected"));
+  EXPECT_EQ(1u, stats.values.count("master/slaves_active"));
+  EXPECT_EQ(1u, stats.values.count("master/slaves_inactive"));
+
+  EXPECT_EQ(1u, stats.values.count("master/frameworks_connected"));
+  EXPECT_EQ(1u, stats.values.count("master/frameworks_disconnected"));
+  EXPECT_EQ(1u, stats.values.count("master/frameworks_active"));
+  EXPECT_EQ(1u, stats.values.count("master/frameworks_inactive"));
+
+  EXPECT_EQ(1u, stats.values.count("master/outstanding_offers"));
+
+  EXPECT_EQ(1u, stats.values.count("master/tasks_staging"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_starting"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_running"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_finished"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_failed"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_killed"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_lost"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_error"));
+
+  EXPECT_EQ(1u, stats.values.count("master/dropped_messages"));
+
+  // Messages from schedulers.
+  EXPECT_EQ(1u, stats.values.count("master/messages_register_framework"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_reregister_framework"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_unregister_framework"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_deactivate_framework"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_kill_task"));
+  EXPECT_EQ(1u, stats.values.count(
+      "master/messages_status_update_acknowledgement"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_resource_request"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_launch_tasks"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_decline_offers"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_revive_offers"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_reconcile_tasks"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_framework_to_executor"));
+
+  // Messages from slaves.
+  EXPECT_EQ(1u, stats.values.count("master/messages_register_slave"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_reregister_slave"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_unregister_slave"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_status_update"));
+  EXPECT_EQ(1u, stats.values.count("master/messages_exited_executor"));
+
+  // Messages from both schedulers and slaves.
+  EXPECT_EQ(1u, stats.values.count("master/messages_authenticate"));
+
+  EXPECT_EQ(1u, stats.values.count(
+      "master/valid_framework_to_executor_messages"));
+  EXPECT_EQ(1u, stats.values.count(
+      "master/invalid_framework_to_executor_messages"));
+
+  EXPECT_EQ(1u, stats.values.count("master/valid_status_updates"));
+  EXPECT_EQ(1u, stats.values.count("master/invalid_status_updates"));
+
+  EXPECT_EQ(1u, stats.values.count(
+      "master/valid_status_update_acknowledgements"));
+  EXPECT_EQ(1u, stats.values.count(
+      "master/invalid_status_update_acknowledgements"));
+
+  EXPECT_EQ(1u, stats.values.count("master/recovery_slave_removals"));
+
+  EXPECT_EQ(1u, stats.values.count("master/event_queue_messages"));
+  EXPECT_EQ(1u, stats.values.count("master/event_queue_dispatches"));
+  EXPECT_EQ(1u, stats.values.count("master/event_queue_http_requests"));
+
+  EXPECT_EQ(1u, stats.values.count("master/cpus_total"));
+  EXPECT_EQ(1u, stats.values.count("master/cpus_used"));
+  EXPECT_EQ(1u, stats.values.count("master/cpus_percent"));
+
+  EXPECT_EQ(1u, stats.values.count("master/mem_total"));
+  EXPECT_EQ(1u, stats.values.count("master/mem_used"));
+  EXPECT_EQ(1u, stats.values.count("master/mem_percent"));
+
+  EXPECT_EQ(1u, stats.values.count("master/disk_total"));
+  EXPECT_EQ(1u, stats.values.count("master/disk_used"));
+  EXPECT_EQ(1u, stats.values.count("master/disk_percent"));
+
+  EXPECT_EQ(1u, stats.values.count("registrar/queued_operations"));
+  EXPECT_EQ(1u, stats.values.count("registrar/registry_size_bytes"));
+
+  EXPECT_EQ(1u, stats.values.count("registrar/state_fetch_ms"));
+  EXPECT_EQ(1u, stats.values.count("registrar/state_store_ms"));
+}

Reply via email to