Added metrics for fetcher totals and failures. This adds two metrics: * containerizer/fetcher/task_fetches_total * containerizer/fetcher/task_fetches_failed
These metrics are incremented once per task, not once per URI, so fetching multiple artifacts per task will result in at most one increment to each of these metrics. Review: https://reviews.apache.org/r/59464/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/47dfbcdb Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/47dfbcdb Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/47dfbcdb Branch: refs/heads/master Commit: 47dfbcdbc97c126c4c5c8dd3c1f345165c564fb9 Parents: 6db493c Author: James Peach <[email protected]> Authored: Wed Jun 21 13:20:49 2017 -0700 Committer: Joseph Wu <[email protected]> Committed: Wed Jun 21 13:56:17 2017 -0700 ---------------------------------------------------------------------- src/slave/containerizer/fetcher.cpp | 21 ++++++++++++++++++--- src/slave/containerizer/fetcher.hpp | 9 +++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/47dfbcdb/src/slave/containerizer/fetcher.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/fetcher.cpp b/src/slave/containerizer/fetcher.cpp index 34c4b5d..741db01 100644 --- a/src/slave/containerizer/fetcher.cpp +++ b/src/slave/containerizer/fetcher.cpp @@ -14,6 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "slave/containerizer/fetcher.hpp" + #include <unordered_map> #include <process/async.hpp> @@ -22,6 +24,8 @@ #include <process/dispatch.hpp> #include <process/owned.hpp> +#include <process/metrics/metrics.hpp> + #include <stout/hashset.hpp> #include <stout/net.hpp> #include <stout/path.hpp> @@ -36,8 +40,6 @@ #include "hdfs/hdfs.hpp" -#include "slave/containerizer/fetcher.hpp" - using std::list; using std::map; using std::shared_ptr; @@ -255,7 +257,13 @@ void Fetcher::kill(const ContainerID& containerId) FetcherProcess::FetcherProcess(const Flags& _flags) : ProcessBase(process::ID::generate("fetcher")), flags(_flags), - cache(_flags.fetcher_cache_size) {} + cache(_flags.fetcher_cache_size), + fetchesTotal ("containerizer/fetcher/task_fetches_total"), + fetchesFailed("containerizer/fetcher/task_fetches_failed") +{ + process::metrics::add(fetchesTotal); + process::metrics::add(fetchesFailed); +} FetcherProcess::~FetcherProcess() @@ -263,6 +271,9 @@ FetcherProcess::~FetcherProcess() foreachkey (const ContainerID& containerId, subprocessPids) { kill(containerId); } + + process::metrics::remove(fetchesTotal); + process::metrics::remove(fetchesFailed); } @@ -327,11 +338,14 @@ Future<Nothing> FetcherProcess::fetch( const string& sandboxDirectory, const Option<string>& user) { + ++fetchesTotal; + VLOG(1) << "Starting to fetch URIs for container: " << containerId << ", directory: " << sandboxDirectory; Try<Nothing> validated = validateUris(commandInfo); if (validated.isError()) { + ++fetchesFailed; return Failure("Could not fetch: " + validated.error()); } @@ -537,6 +551,7 @@ Future<Nothing> FetcherProcess::__fetch( } } + ++fetchesFailed; return future; // Always propagate the failure! }) // Call to `operator` here forces the conversion on MSVC. This is implicit http://git-wip-us.apache.org/repos/asf/mesos/blob/47dfbcdb/src/slave/containerizer/fetcher.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/fetcher.hpp b/src/slave/containerizer/fetcher.hpp index efc714f..1955da0 100644 --- a/src/slave/containerizer/fetcher.hpp +++ b/src/slave/containerizer/fetcher.hpp @@ -30,6 +30,8 @@ #include <process/process.hpp> #include <process/subprocess.hpp> +#include <process/metrics/counter.hpp> + #include <stout/hashmap.hpp> #include "slave/flags.hpp" @@ -321,6 +323,13 @@ private: Cache cache; hashmap<ContainerID, pid_t> subprocessPids; + + // NOTE: These metrics will increment at most once per task. Even if + // a single task asks for multiple artifacts, the total number of + // fetches will only go up by one. And if any of those artifacts + // fail to fetch, the failure count will only increase by one. + process::metrics::Counter fetchesTotal; + process::metrics::Counter fetchesFailed; }; } // namespace slave {
