[mesos] branch master updated: Eliminate accidental loop amplification over unreachable tasks.

bmahler Fri, 04 Oct 2019 10:46:28 -0700

This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git



The following commit(s) were added to refs/heads/master by this push:
     new 0effba6  Eliminate accidental loop amplification over unreachable 
tasks.
0effba6 is described below

commit 0effba65a744079ef613c53677276c2377b633e2
Author: Benjamin Mahler <[email protected]>
AuthorDate: Thu Oct 3 16:39:51 2019 -0400

    Eliminate accidental loop amplification over unreachable tasks.
    
    Per MESOS-9889, the foreachkey operator on a multimap will actually
    loop over each <key,value> entry, thus looping over the same key
    multiple times. The code in the master is written such that
    excessive looping occurs.
    
    For example: <F1,T1> <F1,T2> <F1,T3>
    This will consider T1,T2,T3 3 times each rather than once each!
    
    We could fix this using .keys(), but this patch just replaces the
    multimap with a map of vectors.
    
    Review: https://reviews.apache.org/r/71579
---
 src/master/master.cpp | 16 ++++++++--------
 src/master/master.hpp |  3 ++-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/master/master.cpp b/src/master/master.cpp
index 65994aa..1414317 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1953,7 +1953,7 @@ void Master::_doRegistryGc(
         Framework* framework = getFramework(frameworkId);
         if (framework != nullptr) {
           foreach (const TaskID& taskId,
-                   slaves.unreachableTasks.at(slaveId).get(frameworkId)) {
+                   slaves.unreachableTasks.at(slaveId).at(frameworkId)) {
             framework->unreachableTasks.erase(taskId);
           }
         }
@@ -7870,12 +7870,12 @@ void Master::__reregisterSlave(
   // All tasks from this agent are now reachable so clean them up from
   // the master's unreachable task records.
   if (slaves.unreachableTasks.contains(slaveInfo.id())) {
-    foreachkey (FrameworkID frameworkId,
-               slaves.unreachableTasks.at(slaveInfo.id())) {
+    foreachkey (const FrameworkID& frameworkId,
+                slaves.unreachableTasks.at(slaveInfo.id())) {
       Framework* framework = getFramework(frameworkId);
       if (framework != nullptr) {
-        foreach (TaskID taskId,
-                 slaves.unreachableTasks.at(slaveInfo.id()).get(frameworkId)) {
+        foreach (const TaskID& taskId,
+                 slaves.unreachableTasks.at(slaveInfo.id()).at(frameworkId)) {
           framework->unreachableTasks.erase(taskId);
         }
       }
@@ -9553,7 +9553,7 @@ void Master::markGone(const SlaveID& slaveId, const 
TimeInfo& goneTime)
         }
 
         foreach (const TaskID& taskId,
-                 slaves.unreachableTasks.at(slaveId).get(frameworkId)) {
+                 slaves.unreachableTasks.at(slaveId).at(frameworkId)) {
           if (framework->unreachableTasks.contains(taskId)) {
             const Owned<Task>& task = framework->unreachableTasks.at(taskId);
 
@@ -12105,8 +12105,8 @@ void Master::removeTask(Task* task, bool unreachable)
   }
 
   if (unreachable) {
-    slaves.unreachableTasks[slave->id].put(
-        task->framework_id(), task->task_id());
+    slaves.unreachableTasks[slave->id][task->framework_id()]
+      .push_back(task->task_id());
   }
 
   // Remove from framework.
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 23eb2a6..dc45028 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -2212,7 +2212,8 @@ private:
     // agent reregisters. This map is bounded by the same GC behavior as
     // `unreachable`. When the agent is GC'd from unreachable it's also
     // erased from `unreachableTasks`.
-    hashmap<SlaveID, multihashmap<FrameworkID, TaskID>> unreachableTasks;
+    hashmap<SlaveID, hashmap<FrameworkID, std::vector<TaskID>>>
+      unreachableTasks;
 
     // Slaves that have been marked gone. We recover this from the
     // registry, so it includes slaves marked as gone by other instances

[mesos] branch master updated: Eliminate accidental loop amplification over unreachable tasks.

Reply via email to