Repository: mesos Updated Branches: refs/heads/master b32f1b6f3 -> a2f734498
Added metric for number of preempted executors. We should keep track of how many containers/executors has been destroyed/killed due to preemption (due to QoS corrections). Review: https://reviews.apache.org/r/36862 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a2f73449 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a2f73449 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a2f73449 Branch: refs/heads/master Commit: a2f734498af707f75737da40100f348054370ec1 Parents: b32f1b6 Author: Niklas Nielsen <[email protected]> Authored: Mon Jul 27 16:34:38 2015 -0700 Committer: Niklas Q. Nielsen <[email protected]> Committed: Mon Jul 27 16:34:39 2015 -0700 ---------------------------------------------------------------------- src/slave/metrics.cpp | 4 ++++ src/slave/metrics.hpp | 1 + src/slave/slave.cpp | 2 ++ src/tests/oversubscription_tests.cpp | 10 ++++++++++ src/tests/slave_tests.cpp | 1 + 5 files changed, 18 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/a2f73449/src/slave/metrics.cpp ---------------------------------------------------------------------- diff --git a/src/slave/metrics.cpp b/src/slave/metrics.cpp index ae3a53e..7ed3e0e 100644 --- a/src/slave/metrics.cpp +++ b/src/slave/metrics.cpp @@ -74,6 +74,8 @@ Metrics::Metrics(const Slave& slave) defer(slave, &Slave::_executors_terminating)), executors_terminated( "slave/executors_terminated"), + executors_preempted( + "slave/executors_preempted"), valid_status_updates( "slave/valid_status_updates"), invalid_status_updates( @@ -108,6 +110,7 @@ Metrics::Metrics(const Slave& slave) process::metrics::add(executors_running); process::metrics::add(executors_terminating); process::metrics::add(executors_terminated); + process::metrics::add(executors_preempted); process::metrics::add(valid_status_updates); process::metrics::add(invalid_status_updates); @@ -192,6 +195,7 @@ Metrics::~Metrics() process::metrics::remove(executors_running); process::metrics::remove(executors_terminating); process::metrics::remove(executors_terminated); + process::metrics::remove(executors_preempted); process::metrics::remove(valid_status_updates); process::metrics::remove(invalid_status_updates); http://git-wip-us.apache.org/repos/asf/mesos/blob/a2f73449/src/slave/metrics.hpp ---------------------------------------------------------------------- diff --git a/src/slave/metrics.hpp b/src/slave/metrics.hpp index 43c8662..a39d434 100644 --- a/src/slave/metrics.hpp +++ b/src/slave/metrics.hpp @@ -56,6 +56,7 @@ struct Metrics process::metrics::Gauge executors_running; process::metrics::Gauge executors_terminating; process::metrics::Counter executors_terminated; + process::metrics::Counter executors_preempted; process::metrics::Counter valid_status_updates; process::metrics::Counter invalid_status_updates; http://git-wip-us.apache.org/repos/asf/mesos/blob/a2f73449/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index 16c403b..f91fa92 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -4386,6 +4386,8 @@ void Slave::_qosCorrections(const Future<list<QoSCorrection>>& future) executor->state = Executor::TERMINATING; executor->reason = TaskStatus::REASON_EXECUTOR_PREEMPTED; containerizer->destroy(executor->containerId); + + ++metrics.executors_preempted; break; } case Executor::TERMINATING: http://git-wip-us.apache.org/repos/asf/mesos/blob/a2f73449/src/tests/oversubscription_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/oversubscription_tests.cpp b/src/tests/oversubscription_tests.cpp index c7a2dac..26ef5de 100644 --- a/src/tests/oversubscription_tests.cpp +++ b/src/tests/oversubscription_tests.cpp @@ -805,6 +805,12 @@ TEST_F(OversubscriptionTest, QoSCorrectionKill) Try<PID<Slave>> slave = StartSlave(&controller, CreateSlaveFlags()); ASSERT_SOME(slave); + // Verify presence and initial value of counter for preempted + // executors. + JSON::Object snapshot = Metrics(); + EXPECT_EQ(1u, snapshot.values.count("slave/executors_preempted")); + EXPECT_EQ(0u, snapshot.values["slave/executors_preempted"]); + MockScheduler sched; MesosSchedulerDriver driver( &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL); @@ -856,6 +862,10 @@ TEST_F(OversubscriptionTest, QoSCorrectionKill) ASSERT_EQ(TASK_LOST, status2.get().state()); ASSERT_EQ(TaskStatus::REASON_EXECUTOR_PREEMPTED, status2.get().reason()); + // Verify that slave incremented counter for preempted executors. + snapshot = Metrics(); + EXPECT_EQ(1u, snapshot.values["slave/executors_preempted"]); + driver.stop(); driver.join(); http://git-wip-us.apache.org/repos/asf/mesos/blob/a2f73449/src/tests/slave_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp index e086817..cb5a01e 100644 --- a/src/tests/slave_tests.cpp +++ b/src/tests/slave_tests.cpp @@ -935,6 +935,7 @@ TEST_F(SlaveTest, MetricsInMetricsEndpoint) EXPECT_EQ(1u, snapshot.values.count("slave/executors_running")); EXPECT_EQ(1u, snapshot.values.count("slave/executors_terminating")); EXPECT_EQ(1u, snapshot.values.count("slave/executors_terminated")); + EXPECT_EQ(1u, snapshot.values.count("slave/executors_preempted")); EXPECT_EQ(1u, snapshot.values.count("slave/valid_status_updates")); EXPECT_EQ(1u, snapshot.values.count("slave/invalid_status_updates"));
