Added benchmarks for new registry operations.
These benchmarks cover:
1. The time taken to make all registered agents unreachable and then
reachable. This is similar to what happens during a severe network
partition.
2. The time taken to GC a significant fraction (50%) of the
unreachable list from the registry.
Review: https://reviews.apache.org/r/51909/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/aae81968
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/aae81968
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/aae81968
Branch: refs/heads/master
Commit: aae819684f61f4cb811e0629731a57bdd6f8f3fe
Parents: 47bd3e4
Author: Neil Conway <[email protected]>
Authored: Mon Sep 19 15:49:04 2016 -0700
Committer: Vinod Kone <[email protected]>
Committed: Mon Sep 19 15:49:04 2016 -0700
----------------------------------------------------------------------
src/tests/registrar_tests.cpp | 141 ++++++++++++++++++++++++++++++++++++-
1 file changed, 139 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/aae81968/src/tests/registrar_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/registrar_tests.cpp b/src/tests/registrar_tests.cpp
index 745cded..928bc66 100644
--- a/src/tests/registrar_tests.cpp
+++ b/src/tests/registrar_tests.cpp
@@ -1263,8 +1263,6 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
Registrar registrar(flags, state);
AWAIT_READY(registrar.recover(master));
- vector<SlaveInfo> infos;
-
Attributes attributes = Attributes::parse("foo:bar;baz:quux");
Resources resources =
Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
@@ -1272,6 +1270,7 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
size_t slaveCount = GetParam();
// Create slaves.
+ vector<SlaveInfo> infos;
for (size_t i = 0; i < slaveCount; ++i) {
// Simulate real slave information.
SlaveInfo info;
@@ -1333,6 +1332,144 @@ TEST_P(Registrar_BENCHMARK_Test, Performance)
cout << "Removed " << slaveCount << " agents in " << watch.elapsed() << endl;
}
+
+// Test the performance of marking all registered slaves unreachable,
+// then marking them reachable again. This might occur if there is a
+// network partition and then the partition heals.
+TEST_P(Registrar_BENCHMARK_Test, MarkUnreachableThenReachable)
+{
+ Registrar registrar(flags, state);
+ AWAIT_READY(registrar.recover(master));
+
+ Attributes attributes = Attributes::parse("foo:bar;baz:quux");
+ Resources resources =
+ Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
+
+ size_t slaveCount = GetParam();
+
+ // Create slaves.
+ vector<SlaveInfo> infos;
+ for (size_t i = 0; i < slaveCount; ++i) {
+ // Simulate real slave information.
+ SlaveInfo info;
+ info.set_hostname("localhost");
+ info.mutable_id()->set_value(
+ string("201310101658-2280333834-5050-48574-") + stringify(i));
+ info.mutable_resources()->MergeFrom(resources);
+ info.mutable_attributes()->MergeFrom(attributes);
+ infos.push_back(info);
+ }
+
+ // Admit slaves.
+ Stopwatch watch;
+ watch.start();
+ Future<bool> result;
+ foreach (const SlaveInfo& info, infos) {
+ result = registrar.apply(Owned<Operation>(new AdmitSlave(info)));
+ }
+ AWAIT_READY_FOR(result, Minutes(5));
+ LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed();
+
+ // Shuffle the slaves so that we mark them unreachable in random
+ // order (same as in production).
+ std::random_shuffle(infos.begin(), infos.end());
+
+ // Mark all slaves unreachable.
+ TimeInfo unreachableTime = protobuf::getCurrentTime();
+
+ watch.start();
+ foreach (const SlaveInfo& info, infos) {
+ result = registrar.apply(
+ Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime)));
+ }
+ AWAIT_READY_FOR(result, Minutes(5));
+ cout << "Marked " << slaveCount << " agents unreachable in "
+ << watch.elapsed() << endl;
+
+ // Shuffles the slaves again so that we mark them reachable in
+ // random order (same as in production).
+ std::random_shuffle(infos.begin(), infos.end());
+
+ // Mark all slaves reachable.
+ watch.start();
+ foreach (const SlaveInfo& info, infos) {
+ result = registrar.apply(
+ Owned<Operation>(new MarkSlaveReachable(info)));
+ }
+ AWAIT_READY_FOR(result, Minutes(5));
+ cout << "Marked " << slaveCount << " agents reachable in "
+ << watch.elapsed() << endl;
+}
+
+
+// Test the performance of garbage collecting a large portion of the
+// unreachable list in a single operation. We use a fixed percentage
+// at the moment (50%).
+TEST_P(Registrar_BENCHMARK_Test, GcManyAgents)
+{
+ Registrar registrar(flags, state);
+ AWAIT_READY(registrar.recover(master));
+
+ Attributes attributes = Attributes::parse("foo:bar;baz:quux");
+ Resources resources =
+ Resources::parse("cpus(*):1.0;mem(*):512;disk(*):2048").get();
+
+ size_t slaveCount = GetParam();
+
+ // Create slaves.
+ vector<SlaveInfo> infos;
+ for (size_t i = 0; i < slaveCount; ++i) {
+ // Simulate real slave information.
+ SlaveInfo info;
+ info.set_hostname("localhost");
+ info.mutable_id()->set_value(
+ string("201310101658-2280333834-5050-48574-") + stringify(i));
+ info.mutable_resources()->MergeFrom(resources);
+ info.mutable_attributes()->MergeFrom(attributes);
+ infos.push_back(info);
+ }
+
+ // Admit slaves.
+ Stopwatch watch;
+ watch.start();
+ Future<bool> result;
+ foreach (const SlaveInfo& info, infos) {
+ result = registrar.apply(Owned<Operation>(new AdmitSlave(info)));
+ }
+ AWAIT_READY_FOR(result, Minutes(5));
+ LOG(INFO) << "Admitted " << slaveCount << " agents in " << watch.elapsed();
+
+ // Shuffle the slaves so that we mark them unreachable in random
+ // order (same as in production).
+ std::random_shuffle(infos.begin(), infos.end());
+
+ // Mark all slaves unreachable.
+ TimeInfo unreachableTime = protobuf::getCurrentTime();
+
+ watch.start();
+ foreach (const SlaveInfo& info, infos) {
+ result = registrar.apply(
+ Owned<Operation>(new MarkSlaveUnreachable(info, unreachableTime)));
+ }
+ AWAIT_READY_FOR(result, Minutes(5));
+ LOG(INFO) << "Marked " << slaveCount << " agents unreachable in "
+ << watch.elapsed() << endl;
+
+ // Prepare to GC the first half of the unreachable list.
+ hashset<SlaveID> toRemove;
+ for (size_t i = 0; (i * 2) < slaveCount; i++) {
+ const SlaveInfo& info = infos[i];
+ toRemove.insert(info.id());
+ }
+
+ // Do GC.
+ watch.start();
+ result = registrar.apply(Owned<Operation>(new PruneUnreachable(toRemove)));
+ AWAIT_READY_FOR(result, Minutes(5));
+ cout << "Garbage collected " << toRemove.size() << " agents in "
+ << watch.elapsed() << endl;
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {