This is an automated email from the ASF dual-hosted git repository. bmahler pushed a commit to branch 1.9.x in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 45c3191c16b4a0f56a192385f89080a51e4ec024 Author: Benjamin Mahler <bmah...@apache.org> AuthorDate: Thu Oct 3 16:39:51 2019 -0400 Eliminate accidental loop amplification over unreachable tasks. Per MESOS-9889, the foreachkey operator on a multimap will actually loop over each <key,value> entry, thus looping over the same key multiple times. The code in the master is written such that excessive looping occurs. For example: <F1,T1> <F1,T2> <F1,T3> This will consider T1,T2,T3 3 times each rather than once each! We could fix this using .keys(), but this patch just replaces the multimap with a map of vectors. Review: https://reviews.apache.org/r/71579 --- src/master/master.cpp | 16 ++++++++-------- src/master/master.hpp | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/master/master.cpp b/src/master/master.cpp index 8fe9916..933fc89 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -1954,7 +1954,7 @@ void Master::_doRegistryGc( Framework* framework = getFramework(frameworkId); if (framework != nullptr) { foreach (const TaskID& taskId, - slaves.unreachableTasks.at(slaveId).get(frameworkId)) { + slaves.unreachableTasks.at(slaveId).at(frameworkId)) { framework->unreachableTasks.erase(taskId); } } @@ -7889,12 +7889,12 @@ void Master::__reregisterSlave( // All tasks from this agent are now reachable so clean them up from // the master's unreachable task records. if (slaves.unreachableTasks.contains(slaveInfo.id())) { - foreachkey (FrameworkID frameworkId, - slaves.unreachableTasks.at(slaveInfo.id())) { + foreachkey (const FrameworkID& frameworkId, + slaves.unreachableTasks.at(slaveInfo.id())) { Framework* framework = getFramework(frameworkId); if (framework != nullptr) { - foreach (TaskID taskId, - slaves.unreachableTasks.at(slaveInfo.id()).get(frameworkId)) { + foreach (const TaskID& taskId, + slaves.unreachableTasks.at(slaveInfo.id()).at(frameworkId)) { framework->unreachableTasks.erase(taskId); } } @@ -9596,7 +9596,7 @@ void Master::markGone(const SlaveID& slaveId, const TimeInfo& goneTime) } foreach (const TaskID& taskId, - slaves.unreachableTasks.at(slaveId).get(frameworkId)) { + slaves.unreachableTasks.at(slaveId).at(frameworkId)) { if (framework->unreachableTasks.contains(taskId)) { const Owned<Task>& task = framework->unreachableTasks.at(taskId); @@ -12175,8 +12175,8 @@ void Master::removeTask(Task* task, bool unreachable) } if (unreachable) { - slaves.unreachableTasks[slave->id].put( - task->framework_id(), task->task_id()); + slaves.unreachableTasks[slave->id][task->framework_id()] + .push_back(task->task_id()); } // Remove from framework. diff --git a/src/master/master.hpp b/src/master/master.hpp index 19c1782..c4a1e62 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -2198,7 +2198,8 @@ private: // agent reregisters. This map is bounded by the same GC behavior as // `unreachable`. When the agent is GC'd from unreachable it's also // erased from `unreachableTasks`. - hashmap<SlaveID, multihashmap<FrameworkID, TaskID>> unreachableTasks; + hashmap<SlaveID, hashmap<FrameworkID, std::vector<TaskID>>> + unreachableTasks; // Slaves that have been marked gone. We recover this from the // registry, so it includes slaves marked as gone by other instances