Added metrics for fetcher totals and failures.

This adds two metrics:
  * containerizer/fetcher/task_fetches_total
  * containerizer/fetcher/task_fetches_failed

These metrics are incremented once per task, not once per URI,
so fetching multiple artifacts per task will result in at most
one increment to each of these metrics.

Review: https://reviews.apache.org/r/59464/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/47dfbcdb
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/47dfbcdb
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/47dfbcdb

Branch: refs/heads/master
Commit: 47dfbcdbc97c126c4c5c8dd3c1f345165c564fb9
Parents: 6db493c
Author: James Peach <[email protected]>
Authored: Wed Jun 21 13:20:49 2017 -0700
Committer: Joseph Wu <[email protected]>
Committed: Wed Jun 21 13:56:17 2017 -0700

----------------------------------------------------------------------
 src/slave/containerizer/fetcher.cpp | 21 ++++++++++++++++++---
 src/slave/containerizer/fetcher.hpp |  9 +++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/47dfbcdb/src/slave/containerizer/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher.cpp 
b/src/slave/containerizer/fetcher.cpp
index 34c4b5d..741db01 100644
--- a/src/slave/containerizer/fetcher.cpp
+++ b/src/slave/containerizer/fetcher.cpp
@@ -14,6 +14,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "slave/containerizer/fetcher.hpp"
+
 #include <unordered_map>
 
 #include <process/async.hpp>
@@ -22,6 +24,8 @@
 #include <process/dispatch.hpp>
 #include <process/owned.hpp>
 
+#include <process/metrics/metrics.hpp>
+
 #include <stout/hashset.hpp>
 #include <stout/net.hpp>
 #include <stout/path.hpp>
@@ -36,8 +40,6 @@
 
 #include "hdfs/hdfs.hpp"
 
-#include "slave/containerizer/fetcher.hpp"
-
 using std::list;
 using std::map;
 using std::shared_ptr;
@@ -255,7 +257,13 @@ void Fetcher::kill(const ContainerID& containerId)
 FetcherProcess::FetcherProcess(const Flags& _flags)
     : ProcessBase(process::ID::generate("fetcher")),
       flags(_flags),
-      cache(_flags.fetcher_cache_size) {}
+      cache(_flags.fetcher_cache_size),
+      fetchesTotal ("containerizer/fetcher/task_fetches_total"),
+      fetchesFailed("containerizer/fetcher/task_fetches_failed")
+{
+  process::metrics::add(fetchesTotal);
+  process::metrics::add(fetchesFailed);
+}
 
 
 FetcherProcess::~FetcherProcess()
@@ -263,6 +271,9 @@ FetcherProcess::~FetcherProcess()
   foreachkey (const ContainerID& containerId, subprocessPids) {
     kill(containerId);
   }
+
+  process::metrics::remove(fetchesTotal);
+  process::metrics::remove(fetchesFailed);
 }
 
 
@@ -327,11 +338,14 @@ Future<Nothing> FetcherProcess::fetch(
     const string& sandboxDirectory,
     const Option<string>& user)
 {
+  ++fetchesTotal;
+
   VLOG(1) << "Starting to fetch URIs for container: " << containerId
           << ", directory: " << sandboxDirectory;
 
   Try<Nothing> validated = validateUris(commandInfo);
   if (validated.isError()) {
+    ++fetchesFailed;
     return Failure("Could not fetch: " + validated.error());
   }
 
@@ -537,6 +551,7 @@ Future<Nothing> FetcherProcess::__fetch(
         }
       }
 
+      ++fetchesFailed;
       return future; // Always propagate the failure!
     })
     // Call to `operator` here forces the conversion on MSVC. This is implicit

http://git-wip-us.apache.org/repos/asf/mesos/blob/47dfbcdb/src/slave/containerizer/fetcher.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher.hpp 
b/src/slave/containerizer/fetcher.hpp
index efc714f..1955da0 100644
--- a/src/slave/containerizer/fetcher.hpp
+++ b/src/slave/containerizer/fetcher.hpp
@@ -30,6 +30,8 @@
 #include <process/process.hpp>
 #include <process/subprocess.hpp>
 
+#include <process/metrics/counter.hpp>
+
 #include <stout/hashmap.hpp>
 
 #include "slave/flags.hpp"
@@ -321,6 +323,13 @@ private:
   Cache cache;
 
   hashmap<ContainerID, pid_t> subprocessPids;
+
+  // NOTE: These metrics will increment at most once per task. Even if
+  // a single task asks for multiple artifacts, the total number of
+  // fetches will only go up by one. And if any of those artifacts
+  // fail to fetch, the failure count will only increase by one.
+  process::metrics::Counter fetchesTotal;
+  process::metrics::Counter fetchesFailed;
 };
 
 } // namespace slave {

Reply via email to