Repository: mesos Updated Branches: refs/heads/master 9c17d8a96 -> 2e40c67ec
Replaced slave's 'available' with 'allocated' in hierarchical allocator. Review: https://reviews.apache.org/r/35836 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1fcb1e44 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1fcb1e44 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1fcb1e44 Branch: refs/heads/master Commit: 1fcb1e447ac52b8baf58dc9c88f186e3dfcaab50 Parents: 9c17d8a Author: Jie Yu <[email protected]> Authored: Wed Jun 24 11:12:21 2015 -0700 Committer: Jie Yu <[email protected]> Committed: Thu Jun 25 10:25:40 2015 -0700 ---------------------------------------------------------------------- src/master/allocator/mesos/hierarchical.hpp | 97 +++++++++++++----------- 1 file changed, 52 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/1fcb1e44/src/master/allocator/mesos/hierarchical.hpp ---------------------------------------------------------------------- diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp index ef18ff8..cacf047 100644 --- a/src/master/allocator/mesos/hierarchical.hpp +++ b/src/master/allocator/mesos/hierarchical.hpp @@ -229,8 +229,21 @@ protected: // Total amount of regular *and* oversubscribed resources. Resources total; - // Available regular *and* oversubscribed resources. - Resources available; + // Regular *and* oversubscribed resources that are allocated. + // + // NOTE: We keep track of slave's allocated resources despite + // having that information in sorters. This is because the + // information in sorters is not accurate if some framework + // hasn't reregistered. See MESOS-2919 for details. + Resources allocated; + + // We track the total and allocated resources on the slave, the + // available resources are computed as follows: + // + // available = total - allocated + // + // Note that it's possible for the slave to be over-allocated! + // In this case, allocated > total. bool activated; // Whether to offer resources. bool checkpoint; // Whether slave supports checkpointing. @@ -502,14 +515,14 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::addSlave( slaves[slaveId] = Slave(); slaves[slaveId].total = total; - slaves[slaveId].available = total - Resources::sum(used); + slaves[slaveId].allocated = Resources::sum(used); slaves[slaveId].activated = true; slaves[slaveId].checkpoint = slaveInfo.checkpoint(); slaves[slaveId].hostname = slaveInfo.hostname(); LOG(INFO) << "Added slave " << slaveId << " (" << slaves[slaveId].hostname << ") with " << slaves[slaveId].total - << " (and " << slaves[slaveId].available << " available)"; + << " (allocated: " << slaves[slaveId].allocated << ")"; allocate(slaveId); } @@ -567,24 +580,10 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::updateSlave( slaveId, slaves[slaveId].total.unreserved()); - // Calculate the current allocation of oversubscribed resources. - Resources allocation; - foreachkey (const std::string& role, roles) { - allocation += roleSorter->allocation(role, slaveId).revocable(); - } - - // Update the available resources. - - // First remove the old oversubscribed resources from available. - slaves[slaveId].available -= slaves[slaveId].available.revocable(); - - // Now add the new estimate of available oversubscribed resources. - slaves[slaveId].available += oversubscribed - allocation; - - LOG(INFO) << "Slave " << slaveId << " (" << slaves[slaveId].hostname - << ") updated with oversubscribed resources " << oversubscribed + LOG(INFO) << "Slave " << slaveId << " (" << slaves[slaveId].hostname << ")" + << " updated with oversubscribed resources " << oversubscribed << " (total: " << slaves[slaveId].total - << ", available: " << slaves[slaveId].available << ")"; + << ", allocated: " << slaves[slaveId].allocated << ")"; allocate(slaveId); } @@ -662,36 +661,40 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::updateAllocation( CHECK(slaves.contains(slaveId)); CHECK(frameworks.contains(frameworkId)); - // The total resources on the slave are composed of both allocated - // and available resources: - // - // total = available + allocated - // // Here we apply offer operations to the allocated resources, which // in turns leads to an update of the total. The available resources // remain unchanged. + // Update the allocated resources. FrameworkSorter* frameworkSorter = frameworkSorters[frameworks[frameworkId].role]; - Resources allocation = + Resources frameworkAllocation = frameworkSorter->allocation(frameworkId.value(), slaveId); - // Update the allocated resources. - Try<Resources> updatedAllocation = allocation.apply(operations); - CHECK_SOME(updatedAllocation); + Try<Resources> updatedFrameworkAllocation = + frameworkAllocation.apply(operations); + + CHECK_SOME(updatedFrameworkAllocation); frameworkSorter->update( frameworkId.value(), slaveId, - allocation, - updatedAllocation.get()); + frameworkAllocation, + updatedFrameworkAllocation.get()); roleSorter->update( frameworks[frameworkId].role, slaveId, - allocation.unreserved(), - updatedAllocation.get().unreserved()); + frameworkAllocation.unreserved(), + updatedFrameworkAllocation.get().unreserved()); + + Try<Resources> updatedSlaveAllocation = + slaves[slaveId].allocated.apply(operations); + + CHECK_SOME(updatedSlaveAllocation); + + slaves[slaveId].allocated = updatedSlaveAllocation.get(); // Update the total resources. Try<Resources> updatedTotal = slaves[slaveId].total.apply(operations); @@ -699,13 +702,11 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::updateAllocation( slaves[slaveId].total = updatedTotal.get(); - CHECK_EQ(slaves[slaveId].total - updatedAllocation.get(), - slaves[slaveId].available); - // TODO(jieyu): Do not log if there is no update. LOG(INFO) << "Updated allocation of framework " << frameworkId << " on slave " << slaveId - << " from " << allocation << " to " << updatedAllocation.get(); + << " from " << frameworkAllocation + << " to " << updatedFrameworkAllocation.get(); } @@ -742,14 +743,19 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::recoverResources( } } - // Update resources allocatable on slave (if slave still exists, + // Update resources allocated on slave (if slave still exists, // which it might not in the event that we dispatched Master::offer // before we received Allocator::removeSlave). if (slaves.contains(slaveId)) { - slaves[slaveId].available += resources; + // NOTE: We cannot add the following CHECK due to the double + // precision errors. See MESOS-1187 for details. + // CHECK(slaves[slaveId].allocated.contains(resources)); + + slaves[slaveId].allocated -= resources; LOG(INFO) << "Recovered " << resources - << " (total allocatable: " << slaves[slaveId].available + << " (total: " << slaves[slaveId].total + << ", allocated: " << slaves[slaveId].allocated << ") on slave " << slaveId << " from framework " << frameworkId; } @@ -898,11 +904,12 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::allocate( FrameworkID frameworkId; frameworkId.set_value(frameworkId_); + // Calculate the currently available resources on the slave. + Resources available = slaves[slaveId].total - slaves[slaveId].allocated; + // NOTE: Currently, frameworks are allowed to have '*' role. // Calling reserved('*') returns an empty Resources object. - Resources resources = - slaves[slaveId].available.unreserved() + - slaves[slaveId].available.reserved(role); + Resources resources = available.unreserved() + available.reserved(role); // Remove revocable resources if the framework has not opted // for them. @@ -927,7 +934,7 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::allocate( // meaning that we always allocate the entire remaining // slave resources to a single framework. offerable[frameworkId][slaveId] = resources; - slaves[slaveId].available -= resources; + slaves[slaveId].allocated += resources; // Reserved resources are only accounted for in the framework // sorter, since the reserved resources are not shared across
