This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 038a4d3bfdb1b2e75dac12e2f1a611ba99b97cec
Author: Meng Zhu <[email protected]>
AuthorDate: Wed Jul 17 18:46:49 2019 -0700

    Added offer rescind logic for guarantees enforcement.
    
    Outstanding offers need to be rescinded as needed to ensure
    roles' requested guarantees can be satisfied.
    
    Note, the rescind effort here is best-effort. It is complex and
    expensive to rescind accurately, due to (1) the cost of tracking
    the correct resource state (e.g. for limits, tracking of the
    precise amount of consumed plus offered (with no reservation
    overlap, and similarly, for guarantees, aggregation of all roles'
    consumption and outstanding offers) (2) the race between the master
    and the allocator. In addition, rescinding offers for quota is
    mostly about improving a transient state. Once a quota is set,
    hopefully with resource churn, the quota will eventually be
    enforced. Lastly, once Mesos starts to adopt an optimistic offer
    model (MESOS-1607), quota enforcement will happen during admission
    control, rendering offer rescind unnecessary. As a result, we cut
    some corners here to only make best effort rescinding.
    
    Specifically, for guarantee enforcement, we pessimistically assume
    that what seems like "available" resources in the allocator are
    all gone. We greedily rescind offers until rescinded plus sum of
    current consumed and offered exceed the total guarantees.
    
    Also since rescinded resources and consumed and offered may overlap
    (when rescinded contains reservations that are also part of the
    consumed), it is possible that we may under rescind. On the other
    hand, we also pessimistically assume that there is no available
    resources in the cluster. So chances are we are more likely to
    over than under rescind.
    
    Also added a test.
    
    Review: https://reviews.apache.org/r/71111
---
 src/master/quota_handler.cpp     | 42 ++++++++++++++++++++++-
 src/tests/master_quota_tests.cpp | 72 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/src/master/quota_handler.cpp b/src/master/quota_handler.cpp
index d1be0ab..1748fb9 100644
--- a/src/master/quota_handler.cpp
+++ b/src/master/quota_handler.cpp
@@ -651,7 +651,47 @@ Future<http::Response> Master::QuotaHandler::_update(
           }
         }
 
-        // TODO(mzhu): Rescind offers to satisfy guarantees.
+        // We then rescind offers to ensure roles' guarantees
+        // can be satisfied.
+        ResourceQuantities guarantees{config.guarantees()};
+        ResourceQuantities rescinded;
+
+        // Following the best-effort approach mentioned above, we
+        // pessimistically assume that what seems like "available" resources
+        // in the allocator are all gone. We greedily rescind offers until
+        // rescinded plus sum of current `consumedAndOffered` exceed the
+        // total guarantees.
+        //
+        // Since `rescinded` and `consumedAndOffered` may overlap (when
+        // rescinded contains reservations that are also part of the
+        // consumed), it is possible that we may under rescind.
+        // On the other hand, we also pessimistically assume that
+        // there is no available resources in the cluster. So chances
+        // are we are more likely to over than under rescind.
+        foreachvalue (const Slave* slave, master->slaves.registered) {
+          if ((rescinded + consumedAndOffered).contains(guarantees)) {
+            break;
+          }
+
+          foreach (Offer* offer, utils::copy(slave->offers)) {
+            if ((rescinded + consumedAndOffered).contains(guarantees)) {
+              break;
+            }
+
+            if (allocatedToRoleSubtree(*offer)) {
+              continue;
+            }
+
+            rescinded += ResourceQuantities::fromResources(offer->resources());
+
+            master->allocator->recoverResources(
+                offer->framework_id(),
+                offer->slave_id(),
+                offer->resources(),
+                None());
+            master->removeOffer(offer, true);
+          }
+        }
       }
 
       return OK();
diff --git a/src/tests/master_quota_tests.cpp b/src/tests/master_quota_tests.cpp
index 93f989f..c0775f4 100644
--- a/src/tests/master_quota_tests.cpp
+++ b/src/tests/master_quota_tests.cpp
@@ -1685,6 +1685,78 @@ TEST_F(MasterQuotaTest, 
AvailableResourcesAfterRescinding)
 }
 
 
+// This test ensures that outstanding offers are rescinded as needed to satisfy
+// roles' quota gurantees.
+TEST_F(MasterQuotaTest, RescindOffersForUpdateQuotaGuarantees)
+{
+  TestAllocator<> allocator;
+  EXPECT_CALL(allocator, initialize(_, _, _));
+
+  Try<Owned<cluster::Master>> master = StartMaster(&allocator);
+  ASSERT_SOME(master);
+
+  // Start an agent.
+  slave::Flags flags1 = CreateSlaveFlags();
+  flags1.resources = "cpus:1;mem:1024";
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave1 = StartSlave(detector.get(), flags1);
+  ASSERT_SOME(slave1);
+
+  // Start a framework under `ROLE1`.
+  FrameworkInfo frameworkInfo1 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo1.set_roles(0, ROLE1);
+
+  MockScheduler sched1;
+  MesosSchedulerDriver framework1(
+      &sched1, frameworkInfo1, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  Future<FrameworkID> frameworkId1;
+  EXPECT_CALL(sched1, registered(&framework1, _, _))
+    .WillOnce(FutureArg<1>(&frameworkId1));
+
+  Future<vector<Offer>> offers1;
+  EXPECT_CALL(sched1, resourceOffers(&framework1, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  Future<Nothing> offerRescinded1;
+  EXPECT_CALL(sched1, offerRescinded(&framework1, _))
+    .WillOnce(FutureSatisfy(&offerRescinded1));
+
+  framework1.start();
+
+  AWAIT_READY(offers1);
+  ASSERT_EQ(1u, offers1->size());
+
+  // Cluster resources: cpus:1;mem:1024
+  // Offered: `ROLE1` cpus:1;mem:1024
+
+  // Set `ROLE2` quota guarantees to be the cluster resources.
+  // Outstanding offers are rescinded.
+  {
+    process::http::Headers headers = 
createBasicAuthHeaders(DEFAULT_CREDENTIAL);
+    headers["Content-Type"] = "application/json";
+
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "/api/v1",
+        headers,
+        createUpdateQuotaRequestBody(
+            createQuotaConfig(ROLE2, "cpus:1;mem:1024", "")));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+
+    AWAIT_READY(offerRescinded1);
+  }
+
+  // Tear down frameworks before agents to avoid offers being
+  // rescinded again.
+  framework1.stop();
+  framework1.join();
+}
+
+
 // This tests verifies the offer rescind logic for quota limits enforcement.
 // If a role's quota consumption plus offered are above the requested limits,
 // outstanding offers of that role will be rescinded.

Reply via email to