Repository: mesos Updated Branches: refs/heads/master eb075f9b7 -> da37eb247
Separate Metrics struct from Master class. Review: https://reviews.apache.org/r/27744/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/da37eb24 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/da37eb24 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/da37eb24 Branch: refs/heads/master Commit: da37eb24735471585cbcdcb89b568892095c3379 Parents: eb075f9 Author: Dominic Hamon <[email protected]> Authored: Thu Oct 16 11:47:43 2014 -0700 Committer: Dominic Hamon <[email protected]> Committed: Fri Nov 14 14:23:53 2014 -0800 ---------------------------------------------------------------------- src/Makefile.am | 7 +- src/master/master.cpp | 313 +----------------------------------- src/master/master.hpp | 134 +--------------- src/master/metrics.cpp | 339 +++++++++++++++++++++++++++++++++++++++ src/master/metrics.hpp | 169 +++++++++++++++++++ src/tests/metrics_tests.cpp | 147 +++++++++++++++++ 6 files changed, 664 insertions(+), 445 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/Makefile.am ---------------------------------------------------------------------- diff --git a/src/Makefile.am b/src/Makefile.am index d5cad0a..0fe7dd0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -274,6 +274,7 @@ libmesos_no_3rdparty_la_SOURCES = \ master/drf_sorter.cpp \ master/http.cpp \ master/master.cpp \ + master/metrics.cpp \ master/registry.hpp \ master/registry.proto \ master/registrar.cpp \ @@ -427,6 +428,7 @@ libmesos_no_3rdparty_la_SOURCES += \ master/flags.hpp \ master/hierarchical_allocator_process.hpp \ master/master.hpp \ + master/metrics.hpp \ master/repairer.hpp \ master/registrar.hpp \ master/sorter.hpp \ @@ -1200,6 +1202,7 @@ mesos_tests_SOURCES = \ tests/attributes_tests.cpp \ tests/authentication_tests.cpp \ tests/authorization_tests.cpp \ + tests/common/http_tests.cpp \ tests/composing_containerizer_tests.cpp \ tests/containerizer.cpp \ tests/containerizer_tests.cpp \ @@ -1226,6 +1229,7 @@ mesos_tests_SOURCES = \ tests/master_slave_reconciliation_tests.cpp \ tests/master_tests.cpp \ tests/mesos.cpp \ + tests/metrics_tests.cpp \ tests/module.cpp \ tests/module_tests.cpp \ tests/monitor_tests.cpp \ @@ -1248,8 +1252,7 @@ mesos_tests_SOURCES = \ tests/status_update_manager_tests.cpp \ tests/utils.cpp \ tests/values_tests.cpp \ - tests/zookeeper_url_tests.cpp \ - tests/common/http_tests.cpp + tests/zookeeper_url_tests.cpp mesos_tests_CPPFLAGS = $(MESOS_CPPFLAGS) mesos_tests_CPPFLAGS += -DSOURCE_DIR=\"$(abs_top_srcdir)\" http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 0f89d1f..4b5d582 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -4625,7 +4625,7 @@ void Master::updateTask(Task* task, const StatusUpdate& update) CHECK_NOTNULL(task); // Get the unacknowledged status. - const TaskStatus& status = update.status(); + const TaskStatus& status = update.status(); // Out-of-order updates should not occur, however in case they // do (e.g., due to bugs), prevent them here to ensure that the @@ -5052,317 +5052,6 @@ double Master::_tasks_running() } -// TODO(dhamon): Consider moving to master/metrics.cpp|hpp. -// Message counters are named with "messages_" prefix so they can -// be grouped together alphabetically in the output. -// TODO(alexandra.sava): Add metrics for registered and removed slaves. -Master::Metrics::Metrics(const Master& master) - : uptime_secs( - "master/uptime_secs", - defer(master, &Master::_uptime_secs)), - elected( - "master/elected", - defer(master, &Master::_elected)), - slaves_connected( - "master/slaves_connected", - defer(master, &Master::_slaves_connected)), - slaves_disconnected( - "master/slaves_disconnected", - defer(master, &Master::_slaves_disconnected)), - slaves_active( - "master/slaves_active", - defer(master, &Master::_slaves_active)), - slaves_inactive( - "master/slaves_inactive", - defer(master, &Master::_slaves_inactive)), - frameworks_connected( - "master/frameworks_connected", - defer(master, &Master::_frameworks_connected)), - frameworks_disconnected( - "master/frameworks_disconnected", - defer(master, &Master::_frameworks_disconnected)), - frameworks_active( - "master/frameworks_active", - defer(master, &Master::_frameworks_active)), - frameworks_inactive( - "master/frameworks_inactive", - defer(master, &Master::_frameworks_inactive)), - outstanding_offers( - "master/outstanding_offers", - defer(master, &Master::_outstanding_offers)), - tasks_staging( - "master/tasks_staging", - defer(master, &Master::_tasks_staging)), - tasks_starting( - "master/tasks_starting", - defer(master, &Master::_tasks_starting)), - tasks_running( - "master/tasks_running", - defer(master, &Master::_tasks_running)), - tasks_finished( - "master/tasks_finished"), - tasks_failed( - "master/tasks_failed"), - tasks_killed( - "master/tasks_killed"), - tasks_lost( - "master/tasks_lost"), - tasks_error( - "master/tasks_error"), - dropped_messages( - "master/dropped_messages"), - messages_register_framework( - "master/messages_register_framework"), - messages_reregister_framework( - "master/messages_reregister_framework"), - messages_unregister_framework( - "master/messages_unregister_framework"), - messages_deactivate_framework( - "master/messages_deactivate_framework"), - messages_kill_task( - "master/messages_kill_task"), - messages_status_update_acknowledgement( - "master/messages_status_update_acknowledgement"), - messages_resource_request( - "master/messages_resource_request"), - messages_launch_tasks( - "master/messages_launch_tasks"), - messages_decline_offers( - "master/messages_decline_offers"), - messages_revive_offers( - "master/messages_revive_offers"), - messages_reconcile_tasks( - "master/messages_reconcile_tasks"), - messages_framework_to_executor( - "master/messages_framework_to_executor"), - messages_register_slave( - "master/messages_register_slave"), - messages_reregister_slave( - "master/messages_reregister_slave"), - messages_unregister_slave( - "master/messages_unregister_slave"), - messages_status_update( - "master/messages_status_update"), - messages_exited_executor( - "master/messages_exited_executor"), - messages_authenticate( - "master/messages_authenticate"), - valid_framework_to_executor_messages( - "master/valid_framework_to_executor_messages"), - invalid_framework_to_executor_messages( - "master/invalid_framework_to_executor_messages"), - valid_status_updates( - "master/valid_status_updates"), - invalid_status_updates( - "master/invalid_status_updates"), - valid_status_update_acknowledgements( - "master/valid_status_update_acknowledgements"), - invalid_status_update_acknowledgements( - "master/invalid_status_update_acknowledgements"), - recovery_slave_removals( - "master/recovery_slave_removals"), - event_queue_messages( - "master/event_queue_messages", - defer(master, &Master::_event_queue_messages)), - event_queue_dispatches( - "master/event_queue_dispatches", - defer(master, &Master::_event_queue_dispatches)), - event_queue_http_requests( - "master/event_queue_http_requests", - defer(master, &Master::_event_queue_http_requests)), - slave_registrations( - "master/slave_registrations"), - slave_reregistrations( - "master/slave_reregistrations"), - slave_removals( - "master/slave_removals") -{ - // TODO(dhamon): Check return values of 'add'. - process::metrics::add(uptime_secs); - process::metrics::add(elected); - - process::metrics::add(slaves_connected); - process::metrics::add(slaves_disconnected); - process::metrics::add(slaves_active); - process::metrics::add(slaves_inactive); - - process::metrics::add(frameworks_connected); - process::metrics::add(frameworks_disconnected); - process::metrics::add(frameworks_active); - process::metrics::add(frameworks_inactive); - - process::metrics::add(outstanding_offers); - - process::metrics::add(tasks_staging); - process::metrics::add(tasks_starting); - process::metrics::add(tasks_running); - process::metrics::add(tasks_finished); - process::metrics::add(tasks_failed); - process::metrics::add(tasks_killed); - process::metrics::add(tasks_lost); - process::metrics::add(tasks_error); - - process::metrics::add(dropped_messages); - - // Messages from schedulers. - process::metrics::add(messages_register_framework); - process::metrics::add(messages_reregister_framework); - process::metrics::add(messages_unregister_framework); - process::metrics::add(messages_deactivate_framework); - process::metrics::add(messages_kill_task); - process::metrics::add(messages_status_update_acknowledgement); - process::metrics::add(messages_resource_request); - process::metrics::add(messages_launch_tasks); - process::metrics::add(messages_decline_offers); - process::metrics::add(messages_revive_offers); - process::metrics::add(messages_reconcile_tasks); - process::metrics::add(messages_framework_to_executor); - - // Messages from slaves. - process::metrics::add(messages_register_slave); - process::metrics::add(messages_reregister_slave); - process::metrics::add(messages_unregister_slave); - process::metrics::add(messages_status_update); - process::metrics::add(messages_exited_executor); - - // Messages from both schedulers and slaves. - process::metrics::add(messages_authenticate); - - process::metrics::add(valid_framework_to_executor_messages); - process::metrics::add(invalid_framework_to_executor_messages); - - process::metrics::add(valid_status_updates); - process::metrics::add(invalid_status_updates); - - process::metrics::add(valid_status_update_acknowledgements); - process::metrics::add(invalid_status_update_acknowledgements); - - process::metrics::add(recovery_slave_removals); - - process::metrics::add(event_queue_messages); - process::metrics::add(event_queue_dispatches); - process::metrics::add(event_queue_http_requests); - - process::metrics::add(slave_registrations); - process::metrics::add(slave_reregistrations); - process::metrics::add(slave_removals); - - // Create resource gauges. - // TODO(dhamon): Set these up dynamically when adding a slave based - // on the resources the slave exposes. - const string resources[] = {"cpus", "mem", "disk"}; - - foreach (const string& resource, resources) { - process::metrics::Gauge totalGauge( - "master/" + resource + "_total", - defer(master, &Master::_resources_total, resource)); - resources_total.push_back(totalGauge); - process::metrics::add(totalGauge); - - process::metrics::Gauge usedGauge( - "master/" + resource + "_used", - defer(master, &Master::_resources_used, resource)); - resources_used.push_back(usedGauge); - process::metrics::add(usedGauge); - - process::metrics::Gauge percentGauge( - "master/" + resource + "_percent", - defer(master, &Master::_resources_percent, resource)); - resources_percent.push_back(percentGauge); - process::metrics::add(percentGauge); - } -} - - -Master::Metrics::~Metrics() -{ - // TODO(dhamon): Check return values of 'remove'. - process::metrics::remove(uptime_secs); - process::metrics::remove(elected); - - process::metrics::remove(slaves_connected); - process::metrics::remove(slaves_disconnected); - process::metrics::remove(slaves_active); - process::metrics::remove(slaves_inactive); - - process::metrics::remove(frameworks_connected); - process::metrics::remove(frameworks_disconnected); - process::metrics::remove(frameworks_active); - process::metrics::remove(frameworks_inactive); - - process::metrics::remove(outstanding_offers); - - process::metrics::remove(tasks_staging); - process::metrics::remove(tasks_starting); - process::metrics::remove(tasks_running); - process::metrics::remove(tasks_finished); - process::metrics::remove(tasks_failed); - process::metrics::remove(tasks_killed); - process::metrics::remove(tasks_lost); - process::metrics::remove(tasks_error); - - process::metrics::remove(dropped_messages); - - // Messages from schedulers. - process::metrics::remove(messages_register_framework); - process::metrics::remove(messages_reregister_framework); - process::metrics::remove(messages_unregister_framework); - process::metrics::remove(messages_deactivate_framework); - process::metrics::remove(messages_kill_task); - process::metrics::remove(messages_status_update_acknowledgement); - process::metrics::remove(messages_resource_request); - process::metrics::remove(messages_launch_tasks); - process::metrics::remove(messages_decline_offers); - process::metrics::remove(messages_revive_offers); - process::metrics::remove(messages_reconcile_tasks); - process::metrics::remove(messages_framework_to_executor); - - // Messages from slaves. - process::metrics::remove(messages_register_slave); - process::metrics::remove(messages_reregister_slave); - process::metrics::remove(messages_unregister_slave); - process::metrics::remove(messages_status_update); - process::metrics::remove(messages_exited_executor); - - // Messages from both schedulers and slaves. - process::metrics::remove(messages_authenticate); - - process::metrics::remove(valid_framework_to_executor_messages); - process::metrics::remove(invalid_framework_to_executor_messages); - - process::metrics::remove(valid_status_updates); - process::metrics::remove(invalid_status_updates); - - process::metrics::remove(valid_status_update_acknowledgements); - process::metrics::remove(invalid_status_update_acknowledgements); - - process::metrics::remove(recovery_slave_removals); - - process::metrics::remove(event_queue_messages); - process::metrics::remove(event_queue_dispatches); - process::metrics::remove(event_queue_http_requests); - - process::metrics::remove(slave_registrations); - process::metrics::remove(slave_reregistrations); - process::metrics::remove(slave_removals); - - foreach (const process::metrics::Gauge& gauge, resources_total) { - process::metrics::remove(gauge); - } - resources_total.clear(); - - foreach (const process::metrics::Gauge& gauge, resources_used) { - process::metrics::remove(gauge); - } - resources_used.clear(); - - foreach (const process::metrics::Gauge& gauge, resources_percent) { - process::metrics::remove(gauge); - } - resources_percent.clear(); -} - - double Master::_resources_total(const std::string& name) { double total = 0.0; http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/master.hpp ---------------------------------------------------------------------- diff --git a/src/master/master.hpp b/src/master/master.hpp index 47f3bc9..ece36c3 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -35,10 +35,6 @@ #include <process/protobuf.hpp> #include <process/timer.hpp> -#include <process/metrics/counter.hpp> -#include <process/metrics/gauge.hpp> -#include <process/metrics/metrics.hpp> - #include <stout/cache.hpp> #include <stout/foreach.hpp> #include <stout/hashmap.hpp> @@ -56,6 +52,7 @@ #include "master/contender.hpp" #include "master/detector.hpp" #include "master/flags.hpp" +#include "master/metrics.hpp" #include "master/registrar.hpp" #include "messages/messages.hpp" @@ -475,6 +472,7 @@ private: Master& operator = (const Master&); // No assigning. friend struct OfferVisitor; + friend struct Metrics; const Flags flags; @@ -607,133 +605,7 @@ private: uint64_t invalidFrameworkMessages; } stats; - struct Metrics - { - Metrics(const Master& master); - - ~Metrics(); - - process::metrics::Gauge uptime_secs; - process::metrics::Gauge elected; - - process::metrics::Gauge slaves_connected; - process::metrics::Gauge slaves_disconnected; - process::metrics::Gauge slaves_active; - process::metrics::Gauge slaves_inactive; - - process::metrics::Gauge frameworks_connected; - process::metrics::Gauge frameworks_disconnected; - process::metrics::Gauge frameworks_active; - process::metrics::Gauge frameworks_inactive; - - process::metrics::Gauge outstanding_offers; - - // Task state metrics. - process::metrics::Gauge tasks_staging; - process::metrics::Gauge tasks_starting; - process::metrics::Gauge tasks_running; - process::metrics::Counter tasks_finished; - process::metrics::Counter tasks_failed; - process::metrics::Counter tasks_killed; - process::metrics::Counter tasks_lost; - process::metrics::Counter tasks_error; - - // Message counters. - process::metrics::Counter dropped_messages; - - // Metrics specific to frameworks of a common principal. - // These metrics have names prefixed by "frameworks/<principal>/". - struct Frameworks - { - // Counters for messages from all frameworks of this principal. - // Note: We only count messages from active scheduler - // *instances* while they are *registered*. i.e., messages - // prior to the completion of (re)registration - // (AuthenticateMessage and (Re)RegisterFrameworkMessage) and - // messages from an inactive scheduler instance (after the - // framework has failed over) are not counted. - - // Framework messages received (before processing). - process::metrics::Counter messages_received; - - // Framework messages processed. - // NOTE: This doesn't include dropped messages. Processing of - // a message may be throttled by a RateLimiter if one is - // configured for this principal. Also due to Master's - // asynchronous nature, this doesn't necessarily mean the work - // requested by this message has finished. - process::metrics::Counter messages_processed; - - explicit Frameworks(const std::string& principal) - : messages_received("frameworks/" + principal + "/messages_received"), - messages_processed("frameworks/" + principal + "/messages_processed") - { - process::metrics::add(messages_received); - process::metrics::add(messages_processed); - } - - ~Frameworks() - { - process::metrics::remove(messages_received); - process::metrics::remove(messages_processed); - } - }; - - // Per-framework-principal metrics keyed by the framework - // principal. - hashmap<std::string, process::Owned<Frameworks>> frameworks; - - // Messages from schedulers. - process::metrics::Counter messages_register_framework; - process::metrics::Counter messages_reregister_framework; - process::metrics::Counter messages_unregister_framework; - process::metrics::Counter messages_deactivate_framework; - process::metrics::Counter messages_kill_task; - process::metrics::Counter messages_status_update_acknowledgement; - process::metrics::Counter messages_resource_request; - process::metrics::Counter messages_launch_tasks; - process::metrics::Counter messages_decline_offers; - process::metrics::Counter messages_revive_offers; - process::metrics::Counter messages_reconcile_tasks; - process::metrics::Counter messages_framework_to_executor; - - // Messages from slaves. - process::metrics::Counter messages_register_slave; - process::metrics::Counter messages_reregister_slave; - process::metrics::Counter messages_unregister_slave; - process::metrics::Counter messages_status_update; - process::metrics::Counter messages_exited_executor; - - // Messages from both schedulers and slaves. - process::metrics::Counter messages_authenticate; - - process::metrics::Counter valid_framework_to_executor_messages; - process::metrics::Counter invalid_framework_to_executor_messages; - - process::metrics::Counter valid_status_updates; - process::metrics::Counter invalid_status_updates; - - process::metrics::Counter valid_status_update_acknowledgements; - process::metrics::Counter invalid_status_update_acknowledgements; - - // Recovery counters. - process::metrics::Counter recovery_slave_removals; - - // Process metrics. - process::metrics::Gauge event_queue_messages; - process::metrics::Gauge event_queue_dispatches; - process::metrics::Gauge event_queue_http_requests; - - // Successful registry operations. - process::metrics::Counter slave_registrations; - process::metrics::Counter slave_reregistrations; - process::metrics::Counter slave_removals; - - // Resource metrics. - std::vector<process::metrics::Gauge> resources_total; - std::vector<process::metrics::Gauge> resources_used; - std::vector<process::metrics::Gauge> resources_percent; - } metrics; + Metrics metrics; // Gauge handlers. double _uptime_secs() http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/metrics.cpp ---------------------------------------------------------------------- diff --git a/src/master/metrics.cpp b/src/master/metrics.cpp new file mode 100644 index 0000000..a7ac96d --- /dev/null +++ b/src/master/metrics.cpp @@ -0,0 +1,339 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "master/metrics.hpp" + +#include "master/master.hpp" + +namespace mesos { +namespace internal { +namespace master { + +// Message counters are named with "messages_" prefix so they can +// be grouped together alphabetically in the output. +// TODO(alexandra.sava): Add metrics for registered and removed slaves. +Metrics::Metrics(const Master& master) + : uptime_secs( + "master/uptime_secs", + defer(master, &Master::_uptime_secs)), + elected( + "master/elected", + defer(master, &Master::_elected)), + slaves_connected( + "master/slaves_connected", + defer(master, &Master::_slaves_connected)), + slaves_disconnected( + "master/slaves_disconnected", + defer(master, &Master::_slaves_disconnected)), + slaves_active( + "master/slaves_active", + defer(master, &Master::_slaves_active)), + slaves_inactive( + "master/slaves_inactive", + defer(master, &Master::_slaves_inactive)), + frameworks_connected( + "master/frameworks_connected", + defer(master, &Master::_frameworks_connected)), + frameworks_disconnected( + "master/frameworks_disconnected", + defer(master, &Master::_frameworks_disconnected)), + frameworks_active( + "master/frameworks_active", + defer(master, &Master::_frameworks_active)), + frameworks_inactive( + "master/frameworks_inactive", + defer(master, &Master::_frameworks_inactive)), + outstanding_offers( + "master/outstanding_offers", + defer(master, &Master::_outstanding_offers)), + tasks_staging( + "master/tasks_staging", + defer(master, &Master::_tasks_staging)), + tasks_starting( + "master/tasks_starting", + defer(master, &Master::_tasks_starting)), + tasks_running( + "master/tasks_running", + defer(master, &Master::_tasks_running)), + tasks_finished( + "master/tasks_finished"), + tasks_failed( + "master/tasks_failed"), + tasks_killed( + "master/tasks_killed"), + tasks_lost( + "master/tasks_lost"), + tasks_error( + "master/tasks_error"), + dropped_messages( + "master/dropped_messages"), + messages_register_framework( + "master/messages_register_framework"), + messages_reregister_framework( + "master/messages_reregister_framework"), + messages_unregister_framework( + "master/messages_unregister_framework"), + messages_deactivate_framework( + "master/messages_deactivate_framework"), + messages_kill_task( + "master/messages_kill_task"), + messages_status_update_acknowledgement( + "master/messages_status_update_acknowledgement"), + messages_resource_request( + "master/messages_resource_request"), + messages_launch_tasks( + "master/messages_launch_tasks"), + messages_decline_offers( + "master/messages_decline_offers"), + messages_revive_offers( + "master/messages_revive_offers"), + messages_reconcile_tasks( + "master/messages_reconcile_tasks"), + messages_framework_to_executor( + "master/messages_framework_to_executor"), + messages_register_slave( + "master/messages_register_slave"), + messages_reregister_slave( + "master/messages_reregister_slave"), + messages_unregister_slave( + "master/messages_unregister_slave"), + messages_status_update( + "master/messages_status_update"), + messages_exited_executor( + "master/messages_exited_executor"), + messages_authenticate( + "master/messages_authenticate"), + valid_framework_to_executor_messages( + "master/valid_framework_to_executor_messages"), + invalid_framework_to_executor_messages( + "master/invalid_framework_to_executor_messages"), + valid_status_updates( + "master/valid_status_updates"), + invalid_status_updates( + "master/invalid_status_updates"), + valid_status_update_acknowledgements( + "master/valid_status_update_acknowledgements"), + invalid_status_update_acknowledgements( + "master/invalid_status_update_acknowledgements"), + recovery_slave_removals( + "master/recovery_slave_removals"), + event_queue_messages( + "master/event_queue_messages", + defer(master, &Master::_event_queue_messages)), + event_queue_dispatches( + "master/event_queue_dispatches", + defer(master, &Master::_event_queue_dispatches)), + event_queue_http_requests( + "master/event_queue_http_requests", + defer(master, &Master::_event_queue_http_requests)), + slave_registrations( + "master/slave_registrations"), + slave_reregistrations( + "master/slave_reregistrations"), + slave_removals( + "master/slave_removals") +{ + // TODO(dhamon): Check return values of 'add'. + process::metrics::add(uptime_secs); + process::metrics::add(elected); + + process::metrics::add(slaves_connected); + process::metrics::add(slaves_disconnected); + process::metrics::add(slaves_active); + process::metrics::add(slaves_inactive); + + process::metrics::add(frameworks_connected); + process::metrics::add(frameworks_disconnected); + process::metrics::add(frameworks_active); + process::metrics::add(frameworks_inactive); + + process::metrics::add(outstanding_offers); + + process::metrics::add(tasks_staging); + process::metrics::add(tasks_starting); + process::metrics::add(tasks_running); + process::metrics::add(tasks_finished); + process::metrics::add(tasks_failed); + process::metrics::add(tasks_killed); + process::metrics::add(tasks_lost); + process::metrics::add(tasks_error); + + process::metrics::add(dropped_messages); + + // Messages from schedulers. + process::metrics::add(messages_register_framework); + process::metrics::add(messages_reregister_framework); + process::metrics::add(messages_unregister_framework); + process::metrics::add(messages_deactivate_framework); + process::metrics::add(messages_kill_task); + process::metrics::add(messages_status_update_acknowledgement); + process::metrics::add(messages_resource_request); + process::metrics::add(messages_launch_tasks); + process::metrics::add(messages_decline_offers); + process::metrics::add(messages_revive_offers); + process::metrics::add(messages_reconcile_tasks); + process::metrics::add(messages_framework_to_executor); + + // Messages from slaves. + process::metrics::add(messages_register_slave); + process::metrics::add(messages_reregister_slave); + process::metrics::add(messages_unregister_slave); + process::metrics::add(messages_status_update); + process::metrics::add(messages_exited_executor); + + // Messages from both schedulers and slaves. + process::metrics::add(messages_authenticate); + + process::metrics::add(valid_framework_to_executor_messages); + process::metrics::add(invalid_framework_to_executor_messages); + + process::metrics::add(valid_status_updates); + process::metrics::add(invalid_status_updates); + + process::metrics::add(valid_status_update_acknowledgements); + process::metrics::add(invalid_status_update_acknowledgements); + + process::metrics::add(recovery_slave_removals); + + process::metrics::add(event_queue_messages); + process::metrics::add(event_queue_dispatches); + process::metrics::add(event_queue_http_requests); + + process::metrics::add(slave_registrations); + process::metrics::add(slave_reregistrations); + process::metrics::add(slave_removals); + + // Create resource gauges. + // TODO(dhamon): Set these up dynamically when adding a slave based on the + // resources the slave exposes. + const std::string resources[] = {"cpus", "mem", "disk"}; + + foreach (const std::string& resource, resources) { + process::metrics::Gauge totalGauge( + "master/" + resource + "_total", + defer(master, &Master::_resources_total, resource)); + resources_total.push_back(totalGauge); + process::metrics::add(totalGauge); + + process::metrics::Gauge usedGauge( + "master/" + resource + "_used", + defer(master, &Master::_resources_used, resource)); + resources_used.push_back(usedGauge); + process::metrics::add(usedGauge); + + process::metrics::Gauge percentGauge( + "master/" + resource + "_percent", + defer(master, &Master::_resources_percent, resource)); + resources_percent.push_back(percentGauge); + process::metrics::add(percentGauge); + } +} + + +Metrics::~Metrics() +{ + // TODO(dhamon): Check return values of 'remove'. + process::metrics::remove(uptime_secs); + process::metrics::remove(elected); + + process::metrics::remove(slaves_connected); + process::metrics::remove(slaves_disconnected); + process::metrics::remove(slaves_active); + process::metrics::remove(slaves_inactive); + + process::metrics::remove(frameworks_connected); + process::metrics::remove(frameworks_disconnected); + process::metrics::remove(frameworks_active); + process::metrics::remove(frameworks_inactive); + + process::metrics::remove(outstanding_offers); + + process::metrics::remove(tasks_staging); + process::metrics::remove(tasks_starting); + process::metrics::remove(tasks_running); + process::metrics::remove(tasks_finished); + process::metrics::remove(tasks_failed); + process::metrics::remove(tasks_killed); + process::metrics::remove(tasks_lost); + process::metrics::remove(tasks_error); + + process::metrics::remove(dropped_messages); + + // Messages from schedulers. + process::metrics::remove(messages_register_framework); + process::metrics::remove(messages_reregister_framework); + process::metrics::remove(messages_unregister_framework); + process::metrics::remove(messages_deactivate_framework); + process::metrics::remove(messages_kill_task); + process::metrics::remove(messages_status_update_acknowledgement); + process::metrics::remove(messages_resource_request); + process::metrics::remove(messages_launch_tasks); + process::metrics::remove(messages_decline_offers); + process::metrics::remove(messages_revive_offers); + process::metrics::remove(messages_reconcile_tasks); + process::metrics::remove(messages_framework_to_executor); + + // Messages from slaves. + process::metrics::remove(messages_register_slave); + process::metrics::remove(messages_reregister_slave); + process::metrics::remove(messages_unregister_slave); + process::metrics::remove(messages_status_update); + process::metrics::remove(messages_exited_executor); + + // Messages from both schedulers and slaves. + process::metrics::remove(messages_authenticate); + + process::metrics::remove(valid_framework_to_executor_messages); + process::metrics::remove(invalid_framework_to_executor_messages); + + process::metrics::remove(valid_status_updates); + process::metrics::remove(invalid_status_updates); + + process::metrics::remove(valid_status_update_acknowledgements); + process::metrics::remove(invalid_status_update_acknowledgements); + + process::metrics::remove(recovery_slave_removals); + + process::metrics::remove(event_queue_messages); + process::metrics::remove(event_queue_dispatches); + process::metrics::remove(event_queue_http_requests); + + process::metrics::remove(slave_registrations); + process::metrics::remove(slave_reregistrations); + process::metrics::remove(slave_removals); + + foreach (const process::metrics::Gauge& gauge, resources_total) { + process::metrics::remove(gauge); + } + resources_total.clear(); + + foreach (const process::metrics::Gauge& gauge, resources_used) { + process::metrics::remove(gauge); + } + resources_used.clear(); + + foreach (const process::metrics::Gauge& gauge, resources_percent) { + process::metrics::remove(gauge); + } + resources_percent.clear(); +} + + +} // namespace master { +} // namespace internal { +} // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/master/metrics.hpp ---------------------------------------------------------------------- diff --git a/src/master/metrics.hpp b/src/master/metrics.hpp new file mode 100644 index 0000000..5e6b6d5 --- /dev/null +++ b/src/master/metrics.hpp @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MASTER_METRICS_HPP__ +#define __MASTER_METRICS_HPP__ + +#include <string> +#include <vector> + +#include <process/metrics/counter.hpp> +#include <process/metrics/gauge.hpp> +#include <process/metrics/metrics.hpp> + +#include <stout/hashmap.hpp> + +namespace mesos { +namespace internal { +namespace master { + +class Master; + +struct Metrics +{ + Metrics(const Master& master); + + ~Metrics(); + + process::metrics::Gauge uptime_secs; + process::metrics::Gauge elected; + + process::metrics::Gauge slaves_connected; + process::metrics::Gauge slaves_disconnected; + process::metrics::Gauge slaves_active; + process::metrics::Gauge slaves_inactive; + + process::metrics::Gauge frameworks_connected; + process::metrics::Gauge frameworks_disconnected; + process::metrics::Gauge frameworks_active; + process::metrics::Gauge frameworks_inactive; + + process::metrics::Gauge outstanding_offers; + + // Task state metrics. + process::metrics::Gauge tasks_staging; + process::metrics::Gauge tasks_starting; + process::metrics::Gauge tasks_running; + process::metrics::Counter tasks_finished; + process::metrics::Counter tasks_failed; + process::metrics::Counter tasks_killed; + process::metrics::Counter tasks_lost; + process::metrics::Counter tasks_error; + + // Message counters. + process::metrics::Counter dropped_messages; + + // Metrics specific to frameworks of a common principal. + // These metrics have names prefixed by "frameworks/<principal>/". + struct Frameworks + { + // Counters for messages from all frameworks of this principal. + // Note: We only count messages from active scheduler + // *instances* while they are *registered*. i.e., messages + // prior to the completion of (re)registration + // (AuthenticateMessage and (Re)RegisterFrameworkMessage) and + // messages from an inactive scheduler instance (after the + // framework has failed over) are not counted. + + // Framework messages received (before processing). + process::metrics::Counter messages_received; + + // Framework messages processed. + // NOTE: This doesn't include dropped messages. Processing of + // a message may be throttled by a RateLimiter if one is + // configured for this principal. Also due to Master's + // asynchronous nature, this doesn't necessarily mean the work + // requested by this message has finished. + process::metrics::Counter messages_processed; + + explicit Frameworks(const std::string& principal) + : messages_received("frameworks/" + principal + "/messages_received"), + messages_processed("frameworks/" + principal + "/messages_processed") + { + process::metrics::add(messages_received); + process::metrics::add(messages_processed); + } + + ~Frameworks() + { + process::metrics::remove(messages_received); + process::metrics::remove(messages_processed); + } + }; + + // Per-framework-principal metrics keyed by the framework + // principal. + hashmap<std::string, process::Owned<Frameworks>> frameworks; + + // Messages from schedulers. + process::metrics::Counter messages_register_framework; + process::metrics::Counter messages_reregister_framework; + process::metrics::Counter messages_unregister_framework; + process::metrics::Counter messages_deactivate_framework; + process::metrics::Counter messages_kill_task; + process::metrics::Counter messages_status_update_acknowledgement; + process::metrics::Counter messages_resource_request; + process::metrics::Counter messages_launch_tasks; + process::metrics::Counter messages_decline_offers; + process::metrics::Counter messages_revive_offers; + process::metrics::Counter messages_reconcile_tasks; + process::metrics::Counter messages_framework_to_executor; + + // Messages from slaves. + process::metrics::Counter messages_register_slave; + process::metrics::Counter messages_reregister_slave; + process::metrics::Counter messages_unregister_slave; + process::metrics::Counter messages_status_update; + process::metrics::Counter messages_exited_executor; + + // Messages from both schedulers and slaves. + process::metrics::Counter messages_authenticate; + + process::metrics::Counter valid_framework_to_executor_messages; + process::metrics::Counter invalid_framework_to_executor_messages; + + process::metrics::Counter valid_status_updates; + process::metrics::Counter invalid_status_updates; + + process::metrics::Counter valid_status_update_acknowledgements; + process::metrics::Counter invalid_status_update_acknowledgements; + + // Recovery counters. + process::metrics::Counter recovery_slave_removals; + + // Process metrics. + process::metrics::Gauge event_queue_messages; + process::metrics::Gauge event_queue_dispatches; + process::metrics::Gauge event_queue_http_requests; + + // Successful registry operations. + process::metrics::Counter slave_registrations; + process::metrics::Counter slave_reregistrations; + process::metrics::Counter slave_removals; + + // Resource metrics. + std::vector<process::metrics::Gauge> resources_total; + std::vector<process::metrics::Gauge> resources_used; + std::vector<process::metrics::Gauge> resources_percent; +}; + +} // namespace master { +} // namespace internal { +} // namespace mesos { + +#endif // __MASTER_METRICS_HPP__ http://git-wip-us.apache.org/repos/asf/mesos/blob/da37eb24/src/tests/metrics_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/metrics_tests.cpp b/src/tests/metrics_tests.cpp new file mode 100644 index 0000000..72571f2 --- /dev/null +++ b/src/tests/metrics_tests.cpp @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include <process/future.hpp> +#include <process/http.hpp> +#include <process/pid.hpp> + +#include <stout/gtest.hpp> +#include <stout/try.hpp> + +#include "master/master.hpp" +#include "tests/mesos.hpp" + +using namespace mesos; +using namespace mesos::internal; + +using mesos::internal::master::Master; + +class MetricsTest : public mesos::internal::tests::MesosTest {}; + +TEST_F(MetricsTest, Master) +{ + Try<process::PID<Master>> master = StartMaster(); + ASSERT_SOME(master); + + // Get the snapshot. + process::UPID upid("metrics", process::node()); + + process::Future<process::http::Response> response = + process::http::get(upid, "snapshot"); + AWAIT_EXPECT_RESPONSE_STATUS_EQ(process::http::OK().status, response); + + EXPECT_SOME_EQ( + "application/json", + response.get().headers.get("Content-Type")); + + Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body); + ASSERT_SOME(parse); + + JSON::Object stats = parse.get(); + + EXPECT_EQ(1u, stats.values.count("master/uptime_secs")); + + EXPECT_EQ(1u, stats.values.count("master/elected")); + + EXPECT_EQ(1u, stats.values.count("master/slaves_connected")); + EXPECT_EQ(1u, stats.values.count("master/slaves_disconnected")); + EXPECT_EQ(1u, stats.values.count("master/slaves_active")); + EXPECT_EQ(1u, stats.values.count("master/slaves_inactive")); + + EXPECT_EQ(1u, stats.values.count("master/frameworks_connected")); + EXPECT_EQ(1u, stats.values.count("master/frameworks_disconnected")); + EXPECT_EQ(1u, stats.values.count("master/frameworks_active")); + EXPECT_EQ(1u, stats.values.count("master/frameworks_inactive")); + + EXPECT_EQ(1u, stats.values.count("master/outstanding_offers")); + + EXPECT_EQ(1u, stats.values.count("master/tasks_staging")); + EXPECT_EQ(1u, stats.values.count("master/tasks_starting")); + EXPECT_EQ(1u, stats.values.count("master/tasks_running")); + EXPECT_EQ(1u, stats.values.count("master/tasks_finished")); + EXPECT_EQ(1u, stats.values.count("master/tasks_failed")); + EXPECT_EQ(1u, stats.values.count("master/tasks_killed")); + EXPECT_EQ(1u, stats.values.count("master/tasks_lost")); + EXPECT_EQ(1u, stats.values.count("master/tasks_error")); + + EXPECT_EQ(1u, stats.values.count("master/dropped_messages")); + + // Messages from schedulers. + EXPECT_EQ(1u, stats.values.count("master/messages_register_framework")); + EXPECT_EQ(1u, stats.values.count("master/messages_reregister_framework")); + EXPECT_EQ(1u, stats.values.count("master/messages_unregister_framework")); + EXPECT_EQ(1u, stats.values.count("master/messages_deactivate_framework")); + EXPECT_EQ(1u, stats.values.count("master/messages_kill_task")); + EXPECT_EQ(1u, stats.values.count( + "master/messages_status_update_acknowledgement")); + EXPECT_EQ(1u, stats.values.count("master/messages_resource_request")); + EXPECT_EQ(1u, stats.values.count("master/messages_launch_tasks")); + EXPECT_EQ(1u, stats.values.count("master/messages_decline_offers")); + EXPECT_EQ(1u, stats.values.count("master/messages_revive_offers")); + EXPECT_EQ(1u, stats.values.count("master/messages_reconcile_tasks")); + EXPECT_EQ(1u, stats.values.count("master/messages_framework_to_executor")); + + // Messages from slaves. + EXPECT_EQ(1u, stats.values.count("master/messages_register_slave")); + EXPECT_EQ(1u, stats.values.count("master/messages_reregister_slave")); + EXPECT_EQ(1u, stats.values.count("master/messages_unregister_slave")); + EXPECT_EQ(1u, stats.values.count("master/messages_status_update")); + EXPECT_EQ(1u, stats.values.count("master/messages_exited_executor")); + + // Messages from both schedulers and slaves. + EXPECT_EQ(1u, stats.values.count("master/messages_authenticate")); + + EXPECT_EQ(1u, stats.values.count( + "master/valid_framework_to_executor_messages")); + EXPECT_EQ(1u, stats.values.count( + "master/invalid_framework_to_executor_messages")); + + EXPECT_EQ(1u, stats.values.count("master/valid_status_updates")); + EXPECT_EQ(1u, stats.values.count("master/invalid_status_updates")); + + EXPECT_EQ(1u, stats.values.count( + "master/valid_status_update_acknowledgements")); + EXPECT_EQ(1u, stats.values.count( + "master/invalid_status_update_acknowledgements")); + + EXPECT_EQ(1u, stats.values.count("master/recovery_slave_removals")); + + EXPECT_EQ(1u, stats.values.count("master/event_queue_messages")); + EXPECT_EQ(1u, stats.values.count("master/event_queue_dispatches")); + EXPECT_EQ(1u, stats.values.count("master/event_queue_http_requests")); + + EXPECT_EQ(1u, stats.values.count("master/cpus_total")); + EXPECT_EQ(1u, stats.values.count("master/cpus_used")); + EXPECT_EQ(1u, stats.values.count("master/cpus_percent")); + + EXPECT_EQ(1u, stats.values.count("master/mem_total")); + EXPECT_EQ(1u, stats.values.count("master/mem_used")); + EXPECT_EQ(1u, stats.values.count("master/mem_percent")); + + EXPECT_EQ(1u, stats.values.count("master/disk_total")); + EXPECT_EQ(1u, stats.values.count("master/disk_used")); + EXPECT_EQ(1u, stats.values.count("master/disk_percent")); + + EXPECT_EQ(1u, stats.values.count("registrar/queued_operations")); + EXPECT_EQ(1u, stats.values.count("registrar/registry_size_bytes")); + + EXPECT_EQ(1u, stats.values.count("registrar/state_fetch_ms")); + EXPECT_EQ(1u, stats.values.count("registrar/state_store_ms")); +}
