Repository: mesos
Updated Branches:
  refs/heads/1.4.x adecf44e1 -> 502863730


Tracked successful task fetches rather than total.

In the fetcher task metrics, track the number of times we
successfully fetch all the task URIs rather than the total
number of times we try to fetch task URIs. The success count
is easier to observe and explain, and is consistent with the
metrics nomenclature for other pending metrics.

Review: https://reviews.apache.org/r/61620/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/50286373
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/50286373
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/50286373

Branch: refs/heads/1.4.x
Commit: 5028637306a5b38a7a7f34535815ac7c1b5462bb
Parents: adecf44
Author: James Peach <jpe...@apache.org>
Authored: Mon Aug 21 12:52:02 2017 -0700
Committer: Jiang Yan Xu <xuj...@apple.com>
Committed: Mon Aug 21 13:31:57 2017 -0700

----------------------------------------------------------------------
 docs/monitoring.md                          |  4 ++--
 src/slave/containerizer/fetcher.cpp         | 13 +++++++------
 src/slave/containerizer/fetcher_process.hpp |  2 +-
 src/tests/fetcher_tests.cpp                 | 24 +++++++++++++-----------
 4 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/50286373/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index d1f64d4..e48f825 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1533,9 +1533,9 @@ on the agent.
 </tr>
 <tr>
   <td>
-  <code>containerizer/fetcher/task_fetches_total</code>
+  <code>containerizer/fetcher/task_fetches_succeeded</code>
   </td>
-  <td>Total number of times the Mesos fetcher tried to fetch URIs for a 
task.</td>
+  <td>Total number of times the Mesos fetcher successfully fetched all the 
URIs for a task.</td>
   <td>Counter</td>
 </tr>
 <tr>

http://git-wip-us.apache.org/repos/asf/mesos/blob/50286373/src/slave/containerizer/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher.cpp 
b/src/slave/containerizer/fetcher.cpp
index fdeb9de..ba5b097 100644
--- a/src/slave/containerizer/fetcher.cpp
+++ b/src/slave/containerizer/fetcher.cpp
@@ -260,7 +260,7 @@ void Fetcher::kill(const ContainerID& containerId)
 
 
 FetcherProcess::Metrics::Metrics(FetcherProcess *fetcher)
-  : task_fetches_total("containerizer/fetcher/task_fetches_total"),
+  : task_fetches_succeeded("containerizer/fetcher/task_fetches_succeeded"),
     task_fetches_failed("containerizer/fetcher/task_fetches_failed"),
     cache_size_total_bytes(
         "containerizer/fetcher/cache_size_total_bytes",
@@ -275,7 +275,7 @@ FetcherProcess::Metrics::Metrics(FetcherProcess *fetcher)
           return fetcher->cache.usedSpace().bytes();
         })
 {
-  process::metrics::add(task_fetches_total);
+  process::metrics::add(task_fetches_succeeded);
   process::metrics::add(task_fetches_failed);
   process::metrics::add(cache_size_total_bytes);
   process::metrics::add(cache_size_used_bytes);
@@ -284,7 +284,7 @@ FetcherProcess::Metrics::Metrics(FetcherProcess *fetcher)
 
 FetcherProcess::Metrics::~Metrics()
 {
-  process::metrics::remove(task_fetches_total);
+  process::metrics::remove(task_fetches_succeeded);
   process::metrics::remove(task_fetches_failed);
 
   // Wait for the metrics to be removed before we allow the destructor
@@ -374,8 +374,6 @@ Future<Nothing> FetcherProcess::fetch(
     const string& sandboxDirectory,
     const Option<string>& user)
 {
-  ++metrics.task_fetches_total;
-
   VLOG(1) << "Starting to fetch URIs for container: " << containerId
           << ", directory: " << sandboxDirectory;
 
@@ -573,6 +571,8 @@ Future<Nothing> FetcherProcess::__fetch(
 
   return run(containerId, sandboxDirectory, user, info)
     .repair(defer(self(), [=](const Future<Nothing>& future) {
+      ++metrics.task_fetches_failed;
+
       LOG(ERROR) << "Failed to run mesos-fetcher: " << future.failure();
 
       foreachvalue (const Option<shared_ptr<Cache::Entry>>& entry, entries) {
@@ -587,7 +587,6 @@ Future<Nothing> FetcherProcess::__fetch(
         }
       }
 
-      ++metrics.task_fetches_failed;
       return future; // Always propagate the failure!
     })
     // Call to `operator` here forces the conversion on MSVC. This is implicit
@@ -595,6 +594,8 @@ Future<Nothing> FetcherProcess::__fetch(
     .operator std::function<process::Future<Nothing>(
         const process::Future<Nothing> &)>())
     .then(defer(self(), [=]() {
+      ++metrics.task_fetches_succeeded;
+
       foreachvalue (const Option<shared_ptr<Cache::Entry>>& entry, entries) {
         if (entry.isSome()) {
           entry.get()->unreference();

http://git-wip-us.apache.org/repos/asf/mesos/blob/50286373/src/slave/containerizer/fetcher_process.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher_process.hpp 
b/src/slave/containerizer/fetcher_process.hpp
index 36010e3..034fe34 100644
--- a/src/slave/containerizer/fetcher_process.hpp
+++ b/src/slave/containerizer/fetcher_process.hpp
@@ -255,7 +255,7 @@ private:
     // a single task asks for multiple artifacts, the total number of
     // fetches will only go up by one. And if any of those artifacts
     // fail to fetch, the failure count will only increase by one.
-    process::metrics::Counter task_fetches_total;
+    process::metrics::Counter task_fetches_succeeded;
     process::metrics::Counter task_fetches_failed;
 
     process::metrics::Gauge cache_size_total_bytes;

http://git-wip-us.apache.org/repos/asf/mesos/blob/50286373/src/tests/fetcher_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fetcher_tests.cpp b/src/tests/fetcher_tests.cpp
index 01da9fe..df9d2d9 100644
--- a/src/tests/fetcher_tests.cpp
+++ b/src/tests/fetcher_tests.cpp
@@ -73,20 +73,22 @@ namespace tests {
 class FetcherTest : public TemporaryDirectoryTest
 {
 public:
-  static void verifyMetrics(unsigned totalCount, unsigned errorCount)
+  static void verifyMetrics(unsigned successCount, unsigned errorCount)
   {
     JSON::Object metrics = Metrics();
 
     // First verify that each metric is present.
     ASSERT_EQ(
-        1u, metrics.values.count("containerizer/fetcher/task_fetches_total"));
+        1u,
+        metrics.values.count("containerizer/fetcher/task_fetches_succeeded"));
     ASSERT_EQ(
-        1u, metrics.values.count("containerizer/fetcher/task_fetches_failed"));
+        1u,
+        metrics.values.count("containerizer/fetcher/task_fetches_failed"));
 
     // Next verify the actual values.
     EXPECT_SOME_EQ(
-      totalCount,
-      metrics.at<JSON::Number>("containerizer/fetcher/task_fetches_total"));
+      successCount,
+      
metrics.at<JSON::Number>("containerizer/fetcher/task_fetches_succeeded"));
     EXPECT_SOME_EQ(
       errorCount,
       metrics.at<JSON::Number>("containerizer/fetcher/task_fetches_failed"));
@@ -229,7 +231,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(FetcherTest, 
AbsoluteCustomSubdirectoryFails)
 
   EXPECT_FALSE(os::exists(localFile));
 
-  verifyMetrics(1, 1);
+  verifyMetrics(0, 1);
 }
 
 
@@ -271,7 +273,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(FetcherTest, InvalidUser)
 
   EXPECT_FALSE(os::exists(localFile));
 
-  verifyMetrics(1, 1);
+  verifyMetrics(0, 1);
 }
 
 
@@ -303,7 +305,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(FetcherTest, 
NonExistingFile)
   // See FetcherProcess::run().
   EXPECT_TRUE(strings::contains(fetch.failure(), "Failed to fetch"));
 
-  verifyMetrics(1, 1);
+  verifyMetrics(0, 1);
 }
 
 
@@ -330,7 +332,7 @@ TEST_F(FetcherTest, MalformedURI)
   // See Fetcher::basename().
   EXPECT_TRUE(strings::contains(fetch.failure(), "Malformed"));
 
-  verifyMetrics(1, 1);
+  verifyMetrics(0, 1);
 }
 
 
@@ -401,7 +403,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(FetcherTest, 
RelativeFilePath)
 
     EXPECT_FALSE(os::exists(localFile));
 
-    verifyMetrics(1, 1);
+    verifyMetrics(0, 1);
   }
 
   {
@@ -907,7 +909,7 @@ TEST_F_TEMP_DISABLED_ON_WINDOWS(FetcherTest, 
UNZIP_ExtractInvalidFile)
 
   ASSERT_SOME_EQ("hello hello\n", os::read(extractedFile));
 
-  verifyMetrics(1, 1);
+  verifyMetrics(0, 1);
 }
 
 

Reply via email to