This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new 0effba6 Eliminate accidental loop amplification over unreachable
tasks.
0effba6 is described below
commit 0effba65a744079ef613c53677276c2377b633e2
Author: Benjamin Mahler <[email protected]>
AuthorDate: Thu Oct 3 16:39:51 2019 -0400
Eliminate accidental loop amplification over unreachable tasks.
Per MESOS-9889, the foreachkey operator on a multimap will actually
loop over each <key,value> entry, thus looping over the same key
multiple times. The code in the master is written such that
excessive looping occurs.
For example: <F1,T1> <F1,T2> <F1,T3>
This will consider T1,T2,T3 3 times each rather than once each!
We could fix this using .keys(), but this patch just replaces the
multimap with a map of vectors.
Review: https://reviews.apache.org/r/71579
---
src/master/master.cpp | 16 ++++++++--------
src/master/master.hpp | 3 ++-
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 65994aa..1414317 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1953,7 +1953,7 @@ void Master::_doRegistryGc(
Framework* framework = getFramework(frameworkId);
if (framework != nullptr) {
foreach (const TaskID& taskId,
- slaves.unreachableTasks.at(slaveId).get(frameworkId)) {
+ slaves.unreachableTasks.at(slaveId).at(frameworkId)) {
framework->unreachableTasks.erase(taskId);
}
}
@@ -7870,12 +7870,12 @@ void Master::__reregisterSlave(
// All tasks from this agent are now reachable so clean them up from
// the master's unreachable task records.
if (slaves.unreachableTasks.contains(slaveInfo.id())) {
- foreachkey (FrameworkID frameworkId,
- slaves.unreachableTasks.at(slaveInfo.id())) {
+ foreachkey (const FrameworkID& frameworkId,
+ slaves.unreachableTasks.at(slaveInfo.id())) {
Framework* framework = getFramework(frameworkId);
if (framework != nullptr) {
- foreach (TaskID taskId,
- slaves.unreachableTasks.at(slaveInfo.id()).get(frameworkId)) {
+ foreach (const TaskID& taskId,
+ slaves.unreachableTasks.at(slaveInfo.id()).at(frameworkId)) {
framework->unreachableTasks.erase(taskId);
}
}
@@ -9553,7 +9553,7 @@ void Master::markGone(const SlaveID& slaveId, const
TimeInfo& goneTime)
}
foreach (const TaskID& taskId,
- slaves.unreachableTasks.at(slaveId).get(frameworkId)) {
+ slaves.unreachableTasks.at(slaveId).at(frameworkId)) {
if (framework->unreachableTasks.contains(taskId)) {
const Owned<Task>& task = framework->unreachableTasks.at(taskId);
@@ -12105,8 +12105,8 @@ void Master::removeTask(Task* task, bool unreachable)
}
if (unreachable) {
- slaves.unreachableTasks[slave->id].put(
- task->framework_id(), task->task_id());
+ slaves.unreachableTasks[slave->id][task->framework_id()]
+ .push_back(task->task_id());
}
// Remove from framework.
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 23eb2a6..dc45028 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -2212,7 +2212,8 @@ private:
// agent reregisters. This map is bounded by the same GC behavior as
// `unreachable`. When the agent is GC'd from unreachable it's also
// erased from `unreachableTasks`.
- hashmap<SlaveID, multihashmap<FrameworkID, TaskID>> unreachableTasks;
+ hashmap<SlaveID, hashmap<FrameworkID, std::vector<TaskID>>>
+ unreachableTasks;
// Slaves that have been marked gone. We recover this from the
// registry, so it includes slaves marked as gone by other instances