This is an automated email from the ASF dual-hosted git repository.

wilfreds pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d7c7904 [YUNIKORN-2146] skip preempted placeholder in allocate (#711)
4d7c7904 is described below

commit 4d7c7904f60ebc76ee481a95f38d26276a9e1b32
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Mon Nov 13 18:55:25 2023 +1100

    [YUNIKORN-2146] skip preempted placeholder in allocate (#711)
    
    Skip preempted placeholders in tryPlaceholderAllocate() as the
    replacement will most likely never happen on time. Not skipping could
    also cause other allocations to keep waiting for the confirmation that
    preemption has worked.
    
    Closes: #711
    
    Signed-off-by: Wilfred Spiegelenburg <[email protected]>
---
 pkg/scheduler/objects/application.go |  4 +-
 pkg/scheduler/partition_test.go      | 71 ++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/pkg/scheduler/objects/application.go 
b/pkg/scheduler/objects/application.go
index 90b909d8..7eb739c4 100644
--- a/pkg/scheduler/objects/application.go
+++ b/pkg/scheduler/objects/application.go
@@ -1119,9 +1119,9 @@ func (sa *Application) 
tryPlaceholderAllocate(nodeIterator func() NodeIterator,
                // walk over the placeholders, allow for processing all as we 
can have multiple task groups
                phAllocs := sa.getPlaceholderAllocations()
                for _, ph := range phAllocs {
-                       // we could have already released this placeholder and 
are waiting for the shim to confirm
+                       // we could have already released preempted this 
placeholder and are waiting for the shim to confirm
                        // and check that we have the correct task group before 
trying to swap
-                       if ph.IsReleased() || request.GetTaskGroup() != 
ph.GetTaskGroup() {
+                       if ph.IsReleased() || ph.IsPreempted() || 
request.GetTaskGroup() != ph.GetTaskGroup() {
                                continue
                        }
                        // before we check anything we need to check the 
resources equality
diff --git a/pkg/scheduler/partition_test.go b/pkg/scheduler/partition_test.go
index 6040a5c1..3cef5c8e 100644
--- a/pkg/scheduler/partition_test.go
+++ b/pkg/scheduler/partition_test.go
@@ -3003,6 +3003,77 @@ func TestPlaceholderMatch(t *testing.T) {
        assertLimits(t, getTestUserGroup(), resources.Multiply(phRes, 3))
 }
 
+func TestPreemptedPlaceholderSkip(t *testing.T) {
+       setupUGM()
+       partition, err := newBasePartition()
+       assert.NilError(t, err, "partition create failed")
+       tgRes := 
resources.NewResourceFromMap(map[string]resources.Quantity{"first": 10, 
"second": 10})
+       phRes := 
resources.NewResourceFromMap(map[string]resources.Quantity{"first": 2, 
"second": 2})
+
+       // add node to allow allocation
+       setupNode(t, nodeID1, partition, tgRes)
+
+       // add the app with placeholder request
+       app := newApplicationTG(appID1, "default", "root.default", tgRes)
+       err = partition.AddApplication(app)
+       assert.NilError(t, err, "app-1 should have been added to the partition")
+       // add an ask for a placeholder and allocate
+       ask := newAllocationAskTG(phID, appID1, taskGroup, phRes, true)
+       err = app.AddAllocationAsk(ask)
+       assert.NilError(t, err, "failed to add placeholder ask ph-1 to app")
+       // try to allocate a placeholder via normal allocate
+       ph := partition.tryAllocate()
+       if ph == nil {
+               t.Fatal("expected placeholder ph-1 to be allocated")
+       }
+       phUUID := ph.GetUUID()
+       assert.Equal(t, phID, ph.GetAllocationKey(), "expected allocation of 
ph-1 to be returned")
+       assert.Equal(t, 1, len(app.GetAllPlaceholderData()), "placeholder data 
should be created on allocate")
+
+       // add a new ask the same task group as the placeholder
+       ask = newAllocationAskTG(allocID, appID1, taskGroup, phRes, false)
+       err = app.AddAllocationAsk(ask)
+       assert.NilError(t, err, "failed to add ask alloc-1 to app")
+       alloc := partition.tryAllocate()
+       if alloc != nil {
+               t.Fatal("expected ask not to be allocated (matched task group)")
+       }
+
+       // mark the placeholder as preempted (shortcut not interested in usage 
accounting etc.)
+       ph.MarkPreempted()
+
+       // replace the placeholder should NOT work
+       alloc = partition.tryPlaceholderAllocate()
+       if alloc != nil {
+               t.Fatal("allocation should not have matched placeholder")
+       }
+
+       // release placeholder: do what the context would do after the shim 
processing
+       release := &si.AllocationRelease{
+               PartitionName:   "test",
+               ApplicationID:   appID1,
+               UUID:            phUUID,
+               TerminationType: si.TerminationType_PREEMPTED_BY_SCHEDULER,
+       }
+       released, confirmed := partition.removeAllocation(release)
+       assert.Equal(t, len(released), 0, "expecting no released allocation")
+       if confirmed != nil {
+               t.Fatal("confirmed allocation should be nil")
+       }
+       assert.Equal(t, int64(1), app.GetAllPlaceholderData()[0].TimedOut, 
"placeholder data should show the preemption")
+
+       // normal allocate should work as we have no placeholders left
+       alloc = partition.tryAllocate()
+       if alloc == nil {
+               t.Fatal("expected ask to be allocated (no placeholder left)")
+       }
+       assert.Equal(t, allocID, alloc.GetAllocationKey(), "expected allocation 
of alloc-1 to be returned")
+       assert.Equal(t, 1, len(app.GetAllPlaceholderData()), "placeholder data 
should not be updated")
+       assert.Equal(t, int64(1), app.GetAllPlaceholderData()[0].Count, 
"placeholder data should show 1 available placeholder")
+       assert.Equal(t, int64(0), app.GetAllPlaceholderData()[0].Replaced, 
"placeholder data should show no replacements")
+       assert.Equal(t, int64(1), app.GetAllPlaceholderData()[0].TimedOut, 
"placeholder data should show the preemption")
+}
+
 // simple direct replace with one node
 func TestTryPlaceholderAllocate(t *testing.T) {
        setupUGM()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to