Repository: mesos Updated Branches: refs/heads/master 3febf57dd -> d93906aea
Added "max allowed age" to metrics endpoint. The max allowed executor directory age in the metrics endpoint will be updated every disk usage cycle (measured every --disk_watch_interval). Between slave start and first disk usage cycle, the max age will be the age computed with no used disk space as input. This patch also modifies the unit test `SlaveTest.StateEndpoint` to take the new entry into account. Review: https://reviews.apache.org/r/30074 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d93906ae Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d93906ae Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d93906ae Branch: refs/heads/master Commit: d93906aeaab21540c2ea6d00caf1dea472a49e25 Parents: 3febf57 Author: Alexander Rojas <[email protected]> Authored: Tue Mar 10 11:31:01 2015 -0700 Committer: Niklas Q. Nielsen <[email protected]> Committed: Tue Mar 10 11:41:04 2015 -0700 ---------------------------------------------------------------------- src/slave/metrics.cpp | 9 ++++++++- src/slave/metrics.hpp | 2 ++ src/slave/slave.cpp | 14 +++++++++++--- src/slave/slave.hpp | 6 ++++++ src/tests/slave_tests.cpp | 4 ++++ 5 files changed, 31 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/d93906ae/src/slave/metrics.cpp ---------------------------------------------------------------------- diff --git a/src/slave/metrics.cpp b/src/slave/metrics.cpp index c32b4d8..7a31ce7 100644 --- a/src/slave/metrics.cpp +++ b/src/slave/metrics.cpp @@ -77,7 +77,10 @@ Metrics::Metrics(const Slave& slave) valid_framework_messages( "slave/valid_framework_messages"), invalid_framework_messages( - "slave/invalid_framework_messages") + "slave/invalid_framework_messages"), + executor_directory_max_allowed_age_secs( + "slave/executor_directory_max_allowed_age_secs", + defer(slave, &Slave::_executor_directory_max_allowed_age_secs)) { // TODO(dhamon): Check return values for metric registration. process::metrics::add(uptime_secs); @@ -106,6 +109,8 @@ Metrics::Metrics(const Slave& slave) process::metrics::add(valid_framework_messages); process::metrics::add(invalid_framework_messages); + process::metrics::add(executor_directory_max_allowed_age_secs); + // Create resource gauges. // TODO(dhamon): Set these up dynamically when creating a slave // based on the resources it exposes. @@ -162,6 +167,8 @@ Metrics::~Metrics() process::metrics::remove(valid_framework_messages); process::metrics::remove(invalid_framework_messages); + process::metrics::remove(executor_directory_max_allowed_age_secs); + foreach (const process::metrics::Gauge& gauge, resources_total) { process::metrics::remove(gauge); } http://git-wip-us.apache.org/repos/asf/mesos/blob/d93906ae/src/slave/metrics.hpp ---------------------------------------------------------------------- diff --git a/src/slave/metrics.hpp b/src/slave/metrics.hpp index 0390461..6af7f07 100644 --- a/src/slave/metrics.hpp +++ b/src/slave/metrics.hpp @@ -63,6 +63,8 @@ struct Metrics process::metrics::Counter valid_framework_messages; process::metrics::Counter invalid_framework_messages; + process::metrics::Gauge executor_directory_max_allowed_age_secs; + // Resource metrics. std::vector<process::metrics::Gauge> resources_total; std::vector<process::metrics::Gauge> resources_used; http://git-wip-us.apache.org/repos/asf/mesos/blob/d93906ae/src/slave/slave.cpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index 364d911..85e0bff 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -130,7 +130,8 @@ Slave::Slave(const slave::Flags& _flags, authenticatee(NULL), authenticating(None()), authenticated(false), - reauthenticate(false) {} + reauthenticate(false), + executorDirectoryMaxAllowedAge(age(0)) {} Slave::~Slave() @@ -3578,15 +3579,16 @@ void Slave::_checkDiskUsage(const Future<double>& usage) LOG(ERROR) << "Failed to get disk usage: " << (usage.isFailed() ? usage.failure() : "future discarded"); } else { + executorDirectoryMaxAllowedAge = age(usage.get()); LOG(INFO) << "Current disk usage " << std::setiosflags(std::ios::fixed) << std::setprecision(2) << 100 * usage.get() << "%." - << " Max allowed age: " << age(usage.get()); + << " Max allowed age: " << executorDirectoryMaxAllowedAge; // We prune all directories whose deletion time is within // the next 'gc_delay - age'. Since a directory is always // scheduled for deletion 'gc_delay' into the future, only directories // that are at least 'age' old are deleted. - gc->prune(flags.gc_delay - age(usage.get())); + gc->prune(flags.gc_delay - executorDirectoryMaxAllowedAge); } delay(flags.disk_watch_interval, self(), &Slave::checkDiskUsage); } @@ -3990,6 +3992,12 @@ double Slave::_executors_terminating() } +double Slave::_executor_directory_max_allowed_age_secs() +{ + return executorDirectoryMaxAllowedAge.secs(); +} + + void Slave::sendExecutorTerminatedStatusUpdate( const TaskID& taskId, const Future<containerizer::Termination>& termination, http://git-wip-us.apache.org/repos/asf/mesos/blob/d93906ae/src/slave/slave.hpp ---------------------------------------------------------------------- diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp index d476d85..989832f 100644 --- a/src/slave/slave.hpp +++ b/src/slave/slave.hpp @@ -411,6 +411,8 @@ private: double _executors_running(); double _executors_terminating(); + double _executor_directory_max_allowed_age_secs(); + void sendExecutorTerminatedStatusUpdate( const TaskID& taskId, const Future<containerizer::Termination>& termination, @@ -481,6 +483,10 @@ private: // Indicates if a new authentication attempt should be enforced. bool reauthenticate; + + // Maximum age of executor directories. Will be recomputed + // periodically every flags.disk_watch_interval. + Duration executorDirectoryMaxAllowedAge; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/d93906ae/src/tests/slave_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp index 27b3410..a975305 100644 --- a/src/tests/slave_tests.cpp +++ b/src/tests/slave_tests.cpp @@ -842,6 +842,10 @@ TEST_F(SlaveTest, MetricsInMetricsEndpoint) EXPECT_EQ(1u, snapshot.values.count("slave/valid_framework_messages")); EXPECT_EQ(1u, snapshot.values.count("slave/invalid_framework_messages")); + EXPECT_EQ( + 1u, + snapshot.values.count("slave/executor_directory_max_allowed_age_secs")); + EXPECT_EQ(1u, snapshot.values.count("slave/cpus_total")); EXPECT_EQ(1u, snapshot.values.count("slave/cpus_used")); EXPECT_EQ(1u, snapshot.values.count("slave/cpus_percent"));
