Repository: mesos Updated Branches: refs/heads/master 25844c57b -> a97ae7510
Updated Frameworks struct to store Resources by SlaveID. Review: https://reviews.apache.org/r/31665 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a97ae751 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a97ae751 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a97ae751 Branch: refs/heads/master Commit: a97ae751029a753ca054b214f78da37aa8050f64 Parents: 25844c5 Author: Michael Park <[email protected]> Authored: Fri Apr 17 14:13:38 2015 -0700 Committer: Benjamin Mahler <[email protected]> Committed: Fri Apr 17 14:13:49 2015 -0700 ---------------------------------------------------------------------- src/master/http.cpp | 6 ++-- src/master/master.cpp | 7 +++-- src/master/master.hpp | 72 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/http.cpp ---------------------------------------------------------------------- diff --git a/src/master/http.cpp b/src/master/http.cpp index f2b123d..00c22c4 100644 --- a/src/master/http.cpp +++ b/src/master/http.cpp @@ -122,11 +122,11 @@ JSON::Object model(const Framework& framework) // TODO(bmahler): Consider deprecating this in favor of the split // used and offered resources below. object.values["resources"] = - model(framework.usedResources + framework.offeredResources); + model(framework.totalUsedResources + framework.totalOfferedResources); // TODO(bmahler): Use these in the webui. - object.values["used_resources"] = model(framework.usedResources); - object.values["offered_resources"] = model(framework.offeredResources); + object.values["used_resources"] = model(framework.totalUsedResources); + object.values["offered_resources"] = model(framework.totalOfferedResources); object.values["hostname"] = framework.info.hostname(); object.values["webui_url"] = framework.info.webui_url(); http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.cpp ---------------------------------------------------------------------- diff --git a/src/master/master.cpp b/src/master/master.cpp index 02f35ac..944a943 100644 --- a/src/master/master.cpp +++ b/src/master/master.cpp @@ -4129,12 +4129,15 @@ void Master::addFramework(Framework* framework) roles[framework->info.role()]->addFramework(framework); // There should be no offered resources yet! - CHECK_EQ(Resources(), framework->offeredResources); + CHECK_EQ(Resources(), framework->totalOfferedResources); + // TODO(mpark): Once the allocator API is updated to operate on + // 'hashmap<SlaveID, Resources>' rather than 'Resources', pass + // 'framework->usedResources' instead. allocator->addFramework( framework->id(), framework->info, - framework->usedResources); + framework->totalUsedResources); // Export framework metrics. http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.hpp ---------------------------------------------------------------------- diff --git a/src/master/master.hpp b/src/master/master.hpp index 4a1df58..c10e7c0 100644 --- a/src/master/master.hpp +++ b/src/master/master.hpp @@ -1007,7 +1007,8 @@ struct Framework tasks[task->task_id()] = task; if (!protobuf::isTerminalState(task->state())) { - usedResources += task->resources(); + totalUsedResources += task->resources(); + usedResources[task->slave_id()] += task->resources(); } } @@ -1022,7 +1023,11 @@ struct Framework << "Unknown task " << task->task_id() << " of framework " << task->framework_id(); - usedResources -= task->resources(); + totalUsedResources -= task->resources(); + usedResources[task->slave_id()] -= task->resources(); + if (usedResources[task->slave_id()].empty()) { + usedResources.erase(task->slave_id()); + } } void addCompletedTask(const Task& task) @@ -1038,7 +1043,11 @@ struct Framework << " of framework " << task->framework_id(); if (!protobuf::isTerminalState(task->state())) { - usedResources -= task->resources(); + totalUsedResources -= task->resources(); + usedResources[task->slave_id()] -= task->resources(); + if (usedResources[task->slave_id()].empty()) { + usedResources.erase(task->slave_id()); + } } addCompletedTask(*task); @@ -1050,7 +1059,8 @@ struct Framework { CHECK(!offers.contains(offer)) << "Duplicate offer " << offer->id(); offers.insert(offer); - offeredResources += offer->resources(); + totalOfferedResources += offer->resources(); + offeredResources[offer->slave_id()] += offer->resources(); } void removeOffer(Offer* offer) @@ -1058,7 +1068,12 @@ struct Framework CHECK(offers.find(offer) != offers.end()) << "Unknown offer " << offer->id(); - offeredResources -= offer->resources(); + totalOfferedResources -= offer->resources(); + offeredResources[offer->slave_id()] -= offer->resources(); + if (offeredResources[offer->slave_id()].empty()) { + offeredResources.erase(offer->slave_id()); + } + offers.erase(offer); } @@ -1077,7 +1092,8 @@ struct Framework << " on slave " << slaveId; executors[slaveId][executorInfo.executor_id()] = executorInfo; - usedResources += executorInfo.resources(); + totalUsedResources += executorInfo.resources(); + usedResources[slaveId] += executorInfo.resources(); } void removeExecutor(const SlaveID& slaveId, @@ -1088,7 +1104,12 @@ struct Framework << " of framework " << id() << " of slave " << slaveId; - usedResources -= executors[slaveId][executorId].resources(); + totalUsedResources -= executors[slaveId][executorId].resources(); + usedResources[slaveId] -= executors[slaveId][executorId].resources(); + if (usedResources[slaveId].empty()) { + usedResources.erase(slaveId); + } + executors[slaveId].erase(executorId); if (executors[slaveId].empty()) { executors.erase(slaveId); @@ -1183,10 +1204,35 @@ struct Framework hashmap<SlaveID, hashmap<ExecutorID, ExecutorInfo>> executors; - // TODO(bmahler): Summing set and ranges resources across slaves - // does not yield meaningful totals. - Resources usedResources; // Active task / executor resources. - Resources offeredResources; // Offered resources. + // NOTE: For the used and offered resources below, we keep the + // total as well as partitioned by SlaveID. + // We expose the total resources via the HTTP endpoint, and we + // keep a running total of the resources because looping over the + // slaves to sum the resources has led to perf issues (MESOS-1862). + // We keep the resources partitioned by SlaveID because non-scalar + // resources can be lost when summing them up across multiple + // slaves (MESOS-2373). + // + // Also note that keeping the totals is safe even though it yields + // incorrect results for non-scalar resources. + // (1) For overlapping set items / ranges across slaves, these + // will get added N times but only represented once. + // (2) When an initial subtraction occurs (N-1), the resource is + // no longer represented. (This is the source of the bug). + // (3) When any further subtractions occur (N-(1+M)), the + // Resources simply ignores the subtraction since there's + // nothing to remove, so this is safe for now. + + // TODO(mpark): Strip the non-scalar resources out of the totals + // in order to avoid reporting incorrect statistics (MESOS-2623). + + // Active task / executor resources. + Resources totalUsedResources; + hashmap<SlaveID, Resources> usedResources; + + // Offered resources. + Resources totalOfferedResources; + hashmap<SlaveID, Resources> offeredResources; private: Framework(const Framework&); // No copying. @@ -1225,8 +1271,8 @@ struct Role { Resources resources; foreachvalue (Framework* framework, frameworks) { - resources += framework->usedResources; - resources += framework->offeredResources; + resources += framework->totalUsedResources; + resources += framework->totalOfferedResources; } return resources;
