This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new e3bcd343 [YUNIKORN-2270] GPU Preemption is not triggered as expected 
when all available GPUs are used. (#759)
e3bcd343 is described below

commit e3bcd343669d65450b88b20cdad0abd0e5b61789
Author: Weiwei Yang <[email protected]>
AuthorDate: Thu Dec 21 11:46:35 2023 +0530

    [YUNIKORN-2270] GPU Preemption is not triggered as expected when all 
available GPUs are used. (#759)
    
    Closes: #759
    
    Signed-off-by: Manikandan R <[email protected]>
---
 pkg/scheduler/objects/application_test.go |  2 +-
 pkg/scheduler/objects/preemption.go       | 15 +++------------
 pkg/scheduler/partition_test.go           |  3 ++-
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/pkg/scheduler/objects/application_test.go 
b/pkg/scheduler/objects/application_test.go
index 55cebf30..f91d5216 100644
--- a/pkg/scheduler/objects/application_test.go
+++ b/pkg/scheduler/objects/application_test.go
@@ -1871,7 +1871,7 @@ func TestTryAllocatePreemptQueue(t *testing.T) {
        // pass the time and try again
        ask3.createTime = ask3.createTime.Add(-30 * time.Second)
        alloc3 = 
app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first":
 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, 
getNode)
-       assert.Assert(t, alloc3 == nil, "alloc3 not expected")
+       assert.Assert(t, alloc3 != nil && alloc3.result == Reserved, "alloc3 
should be a reservation")
        assert.Assert(t, alloc2.IsPreempted(), "alloc2 should have been 
preempted")
 }
 
diff --git a/pkg/scheduler/objects/preemption.go 
b/pkg/scheduler/objects/preemption.go
index ebf82da8..0e37845b 100644
--- a/pkg/scheduler/objects/preemption.go
+++ b/pkg/scheduler/objects/preemption.go
@@ -573,20 +573,11 @@ func (p *Preemptor) TryPreemption() (*Allocation, bool) {
                "preempting allocations to free up resources to run ask: 
"+p.ask.GetAllocationKey())
 
        // reserve the selected node for the new allocation if it will fit
-       if p.headRoom.FitInMaxUndef(p.ask.GetAllocatedResource()) {
-               log.Log(log.SchedPreemption).Info("Reserving node for ask after 
preemption",
-                       zap.String("allocationKey", p.ask.GetAllocationKey()),
-                       zap.String("nodeID", nodeID),
-                       zap.Int("victimCount", len(victims)))
-               return newReservedAllocation(nodeID, p.ask), true
-       }
-
-       // can't reserve as queue is still too full, but scheduling should 
succeed after preemption occurs
-       log.Log(log.SchedPreemption).Info("Preempting allocations for ask, but 
not reserving yet as queue is still above capacity",
+       log.Log(log.SchedPreemption).Info("Reserving node for ask after 
preemption",
                zap.String("allocationKey", p.ask.GetAllocationKey()),
+               zap.String("nodeID", nodeID),
                zap.Int("victimCount", len(victims)))
-
-       return nil, true
+       return newReservedAllocation(nodeID, p.ask), true
 }
 
 type predicateCheckResult struct {
diff --git a/pkg/scheduler/partition_test.go b/pkg/scheduler/partition_test.go
index 3ac19868..1565f5fb 100644
--- a/pkg/scheduler/partition_test.go
+++ b/pkg/scheduler/partition_test.go
@@ -1908,6 +1908,7 @@ func TestPreemption(t *testing.T) {
        assert.Assert(t, alloc2.IsPreempted(), "alloc-2 is not preempted")
 
        // allocation should still not do anything as we have not yet released 
the preempted allocation
+       // but the ask should have a reservation
        alloc = partition.tryAllocate()
        if alloc != nil {
                t.Fatal("unexpected allocation")
@@ -1937,7 +1938,7 @@ func TestPreemption(t *testing.T) {
                t.Fatal("missing allocation")
        }
        assert.Equal(t, 0, len(app2.GetReservations()), "ask should not be 
reserved")
-       assert.Equal(t, alloc.GetResult(), objects.Allocated, "result should be 
allocated")
+       assert.Equal(t, alloc.GetResult(), objects.AllocatedReserved, "result 
should be allocated from reservation")
        assert.Equal(t, alloc.GetAllocationKey(), allocID3, "expected ask 
alloc-3 to be allocated")
        assertUserGroupResourceMaxLimits(t, getTestUserGroup(), 
resources.NewResourceFromMap(map[string]resources.Quantity{"vcore": 10000}), 
getExpectedQueuesLimitsForPreemption())
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to