Repository: mesos Updated Branches: refs/heads/1.4.x 163dd77ef -> 63aca0dee refs/heads/1.5.x 6e2dfd06c -> 74a6f3cdb refs/heads/1.6.x 2f6f3812c -> 70649a220 refs/heads/master 1292d59e7 -> 914728317
Added balloon framework metric for tasks which were running. The framework currently exposes metric counters for various expected and unexpected task termination reasons. Interpreting these counters can be non-trivial since tasks might fail due to benign, but unknown external reasons. This patch adds a counter for the tasks which actually made it to the running stage which can be correlated with the different terminal task counts. Review: https://reviews.apache.org/r/67928 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/74a6f3cd Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/74a6f3cd Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/74a6f3cd Branch: refs/heads/1.5.x Commit: 74a6f3cdb8206b41f96e1ebf5981783ea999c250 Parents: 6e2dfd0 Author: Benjamin Bannier <bbann...@apache.org> Authored: Mon Jul 16 16:39:21 2018 +0200 Committer: Benjamin Bannier <bbann...@apache.org> Committed: Tue Jul 17 18:46:01 2018 +0200 ---------------------------------------------------------------------- src/examples/balloon_framework.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/74a6f3cd/src/examples/balloon_framework.cpp ---------------------------------------------------------------------- diff --git a/src/examples/balloon_framework.cpp b/src/examples/balloon_framework.cpp index 410966e..817d713 100644 --- a/src/examples/balloon_framework.cpp +++ b/src/examples/balloon_framework.cpp @@ -288,7 +288,6 @@ public: taskActive = false; if (status.reason() == TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY) { ++metrics.tasks_oomed; - break; } // NOTE: Fetching the executor (e.g. `--executor_uri`) may fail @@ -296,8 +295,8 @@ public: // enough that it makes sense to track this failure metric separately. if (status.reason() == TaskStatus::REASON_CONTAINER_LAUNCH_FAILED) { ++metrics.launch_failures; - break; } + break; case TASK_KILLED: case TASK_LOST: case TASK_ERROR: @@ -305,9 +304,22 @@ public: if (status.reason() != TaskStatus::REASON_INVALID_OFFERS) { ++metrics.abnormal_terminations; - break; } - default: + break; + case TASK_RUNNING: + ++metrics.tasks_running; + break; + // We ignore uninteresting transient task status updates. + case TASK_KILLING: + case TASK_STAGING: + case TASK_STARTING: + break; + // We ignore task status updates related to reconciliation. + case TASK_DROPPED: + case TASK_GONE: + case TASK_GONE_BY_OPERATOR: + case TASK_UNKNOWN: + case TASK_UNREACHABLE: break; } } @@ -343,6 +355,7 @@ private: defer(_scheduler, &BalloonSchedulerProcess::_registered)), tasks_finished(string(FRAMEWORK_METRICS_PREFIX) + "/tasks_finished"), tasks_oomed(string(FRAMEWORK_METRICS_PREFIX) + "/tasks_oomed"), + tasks_running(string(FRAMEWORK_METRICS_PREFIX) + "/tasks_running"), launch_failures(string(FRAMEWORK_METRICS_PREFIX) + "/launch_failures"), abnormal_terminations( string(FRAMEWORK_METRICS_PREFIX) + "/abnormal_terminations") @@ -351,6 +364,7 @@ private: process::metrics::add(registered); process::metrics::add(tasks_finished); process::metrics::add(tasks_oomed); + process::metrics::add(tasks_running); process::metrics::add(launch_failures); process::metrics::add(abnormal_terminations); } @@ -370,6 +384,7 @@ private: process::metrics::Counter tasks_finished; process::metrics::Counter tasks_oomed; + process::metrics::Counter tasks_running; process::metrics::Counter launch_failures; process::metrics::Counter abnormal_terminations; } metrics;