Added benchmarks for new registry operations.

These benchmarks cover:

  1. The time taken to make all registered agents unreachable and then
     reachable. This is similar to what happens during a severe network
     partition.

  2. The time taken to GC a significant fraction (50%) of the
     unreachable list from the registry.

Review: https://reviews.apache.org/r/51909/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/aae81968
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/aae81968
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/aae81968

Branch: refs/heads/master
Commit: aae819684f61f4cb811e0629731a57bdd6f8f3fe
Parents: 47bd3e4
Author: Neil Conway <neil.con...@gmail.com>
Authored: Mon Sep 19 15:49:04 2016 -0700
Committer: Vinod Kone <vinodk...@gmail.com>
Committed: Mon Sep 19 15:49:04 2016 -0700

----------------------------------------------------------------------
 src/tests/registrar_tests.cpp | 141 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 139 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/aae81968/src/tests/registrar_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp
index 745cded..928bc66 100644
--- a/src/tests/registrar_tests.cpp
+++ b/src/tests/registrar_tests.cpp
@@ -1263,8 +1263,6 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
   Registrar registrar(flags, state);
   AWAIT_READY(registrar.recover(master));
 
-  vector<SlaveInfo> infos;
-
   Attributes attributes = Attributes::parse("foo:bar;baz:quux");
   Resources resources =
     Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
@@ -1272,6 +1270,7 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
   size_t slaveCount = GetParam();
 
   // Create slaves.
+  vector<SlaveInfo> infos;
   for (size_t i = 0; i < slaveCount; ++i) {
     // Simulate real slave information.
     SlaveInfo info;
@@ -1333,6 +1332,144 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
   cout << "Removed " << slaveCount << " agents in " << watch.elapsed() << endl;
 }
 
+
+// Test the performance of marking all registered slaves unreachable,
+// then marking them reachable again. This might occur if there is a
+// network partition and then the partition heals.
+TEST_P(Registrar_BENCHMARK_Test, MarkUnreachableThenReachable)
+{
+  Registrar registrar(flags, state);
+  AWAIT_READY(registrar.recover(master));
+
+  Attributes attributes = Attributes::parse("foo:bar;baz:quux");
+  Resources resources =
+    Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
+
+  size_t slaveCount = GetParam();
+
+  // Create slaves.
+  vector<SlaveInfo> infos;
+  for (size_t i = 0; i < slaveCount; ++i) {
+    // Simulate real slave information.
+    SlaveInfo info;
+    info.set_hostname("localhost");
+    info.mutable_id()->set_value(
+        string("201310101658-2280333834-5050-48574-") + stringify(i));
+    info.mutable_resources()->MergeFrom(resources);
+    info.mutable_attributes()->MergeFrom(attributes);
+    infos.push_back(info);
+  }
+
+  // Admit slaves.
+  Stopwatch watch;
+  watch.start();
+  Future<bool> result;
+  foreach (const SlaveInfo& info, infos) {
+    result = registrar.apply(Owned<Operation>(new AdmitSlave(info)));
+  }
+  AWAIT_READY_FOR(result, Minutes(5));
+  LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed();
+
+  // Shuffle the slaves so that we mark them unreachable in random
+  // order (same as in production).
+  std::random_shuffle(infos.begin(), infos.end());
+
+  // Mark all slaves unreachable.
+  TimeInfo unreachableTime = protobuf::getCurrentTime();
+
+  watch.start();
+  foreach (const SlaveInfo& info, infos) {
+    result = registrar.apply(
+        Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime)));
+  }
+  AWAIT_READY_FOR(result, Minutes(5));
+  cout << "Marked " << slaveCount << " agents unreachable in "
+       << watch.elapsed() << endl;
+
+  // Shuffles the slaves again so that we mark them reachable in
+  // random order (same as in production).
+  std::random_shuffle(infos.begin(), infos.end());
+
+  // Mark all slaves reachable.
+  watch.start();
+  foreach (const SlaveInfo& info, infos) {
+    result = registrar.apply(
+        Owned<Operation>(new MarkSlaveReachable(info)));
+  }
+  AWAIT_READY_FOR(result, Minutes(5));
+  cout << "Marked " << slaveCount << " agents reachable in "
+       << watch.elapsed() << endl;
+}
+
+
+// Test the performance of garbage collecting a large portion of the
+// unreachable list in a single operation. We use a fixed percentage
+// at the moment (50%).
+TEST_P(Registrar_BENCHMARK_Test, GcManyAgents)
+{
+  Registrar registrar(flags, state);
+  AWAIT_READY(registrar.recover(master));
+
+  Attributes attributes = Attributes::parse("foo:bar;baz:quux");
+  Resources resources =
+    Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
+
+  size_t slaveCount = GetParam();
+
+  // Create slaves.
+  vector<SlaveInfo> infos;
+  for (size_t i = 0; i < slaveCount; ++i) {
+    // Simulate real slave information.
+    SlaveInfo info;
+    info.set_hostname("localhost");
+    info.mutable_id()->set_value(
+        string("201310101658-2280333834-5050-48574-") + stringify(i));
+    info.mutable_resources()->MergeFrom(resources);
+    info.mutable_attributes()->MergeFrom(attributes);
+    infos.push_back(info);
+  }
+
+  // Admit slaves.
+  Stopwatch watch;
+  watch.start();
+  Future<bool> result;
+  foreach (const SlaveInfo& info, infos) {
+    result = registrar.apply(Owned<Operation>(new AdmitSlave(info)));
+  }
+  AWAIT_READY_FOR(result, Minutes(5));
+  LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed();
+
+  // Shuffle the slaves so that we mark them unreachable in random
+  // order (same as in production).
+  std::random_shuffle(infos.begin(), infos.end());
+
+  // Mark all slaves unreachable.
+  TimeInfo unreachableTime = protobuf::getCurrentTime();
+
+  watch.start();
+  foreach (const SlaveInfo& info, infos) {
+    result = registrar.apply(
+        Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime)));
+  }
+  AWAIT_READY_FOR(result, Minutes(5));
+  LOG(INFO) << "Marked " << slaveCount << " agents unreachable in "
+            << watch.elapsed() << endl;
+
+  // Prepare to GC the first half of the unreachable list.
+  hashset<SlaveID> toRemove;
+  for (size_t i = 0; (i * 2) < slaveCount; i++) {
+    const SlaveInfo& info = infos[i];
+    toRemove.insert(info.id());
+  }
+
+  // Do GC.
+  watch.start();
+  result = registrar.apply(Owned<Operation>(new PruneUnreachable(toRemove)));
+  AWAIT_READY_FOR(result, Minutes(5));
+  cout << "Garbage collected " << toRemove.size() << " agents in "
+       << watch.elapsed() << endl;
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

Reply via email to