This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new 956ade54 [YUNIKORN-3241] Fix fence root searching when queue is 
over/at max capacity (#1076)
956ade54 is described below

commit 956ade546a8914516a29cff90d0d3e5cc773ec1f
Author: Victor Zhou <[email protected]>
AuthorDate: Tue Mar 24 12:15:12 2026 +0530

    [YUNIKORN-3241] Fix fence root searching when queue is over/at max capacity 
(#1076)
    
    Closes: #1076
    
    Signed-off-by: mani <[email protected]>
---
 pkg/scheduler/objects/queue.go      | 19 ++++++++++++++-----
 pkg/scheduler/objects/queue_test.go | 32 ++++++++++++++++++++++++++------
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/pkg/scheduler/objects/queue.go b/pkg/scheduler/objects/queue.go
index db7a1f18..45ce26db 100644
--- a/pkg/scheduler/objects/queue.go
+++ b/pkg/scheduler/objects/queue.go
@@ -2005,7 +2005,7 @@ func (sq *Queue) FindEligiblePreemptionVictims(queuePath 
string, ask *Allocation
        priorityMap := make(map[string]int64)
 
        // get the queue which acts as the fence boundary
-       fence := sq.findPreemptionFenceRoot(priorityMap, int64(ask.priority))
+       fence := sq.findPreemptionFenceRoot(priorityMap, int64(ask.priority), 
ask.GetAllocatedResource())
        if fence == nil {
                return nil
        }
@@ -2184,7 +2184,7 @@ func prunePreemptionSnapshots(results 
map[string]*QueuePreemptionSnapshot, askQu
        }
 }
 
-func (sq *Queue) findPreemptionFenceRoot(priorityMap map[string]int64, 
currentPriority int64) *Queue {
+func (sq *Queue) findPreemptionFenceRoot(priorityMap map[string]int64, 
currentPriority int64, askResource *resources.Resource) *Queue {
        if sq == nil {
                return nil
        }
@@ -2197,11 +2197,20 @@ func (sq *Queue) findPreemptionFenceRoot(priorityMap 
map[string]int64, currentPr
        }
        priorityMap[sq.QueuePath] = currentPriority
 
-       // Return this queue as fence root if: 1. FencePreemptionPolicy is set 
2. root queue 3. allocations in the queue reached maximum resources
-       if sq.parent == nil || sq.GetPreemptionPolicy() == 
policies.FencePreemptionPolicy || resources.Equals(sq.maxResource, 
sq.allocatedResource) {
+       shouldFenceByMax := false
+       maxResource := sq.GetMaxResource()
+       if maxResource != nil && len(maxResource.Resources) > 0 {
+               projected := resources.Add(sq.allocatedResource, askResource)
+               shouldFenceByMax = 
!maxResource.StrictlyGreaterThanOrEqualsOnlyExisting(projected)
+       }
+       // Return this queue as fence root if:
+       // 1. FencePreemptionPolicy is set
+       // 2. root queue
+       // 3. projected allocations (current usage + ask) reached or exceeded 
any configured max resource
+       if sq.parent == nil || sq.GetPreemptionPolicy() == 
policies.FencePreemptionPolicy || shouldFenceByMax {
                return sq
        }
-       return sq.parent.findPreemptionFenceRoot(priorityMap, currentPriority)
+       return sq.parent.findPreemptionFenceRoot(priorityMap, currentPriority, 
askResource)
 }
 
 func (sq *Queue) GetCurrentPriority() int32 {
diff --git a/pkg/scheduler/objects/queue_test.go 
b/pkg/scheduler/objects/queue_test.go
index d78d6334..dd00d46e 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -2019,14 +2019,14 @@ func TestFindEligiblePreemptionVictims(t *testing.T) {
 
        // disabling preemption on victim queue should remove victims from 
consideration
        leaf2.preemptionPolicy = policies.DisabledPreemptionPolicy
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, "root")
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 0, len(victims(snapshot)), "found victims")
        leaf2.preemptionPolicy = policies.DefaultPreemptionPolicy
 
        // fencing parent1 queue should limit scope
        parent1.preemptionPolicy = policies.FencePreemptionPolicy
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, "root.parent1")
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root.parent1")
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 3, len(snapshot), "wrong snapshot count")
        assert.Equal(t, 0, len(victims(snapshot)), "found victims")
@@ -2034,7 +2034,7 @@ func TestFindEligiblePreemptionVictims(t *testing.T) {
 
        // fencing leaf1 queue should limit scope
        leaf1.preemptionPolicy = policies.FencePreemptionPolicy
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, "root.parent1.leaf1")
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, 
"root.parent1.leaf1")
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 3, len(snapshot), "wrong snapshot count")
        assert.Equal(t, 0, len(victims(snapshot)), "found victims")
@@ -2042,14 +2042,14 @@ func TestFindEligiblePreemptionVictims(t *testing.T) {
 
        // fencing parent2 queue should not limit scope
        parent2.preemptionPolicy = policies.FencePreemptionPolicy
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, "root")
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 2, len(victims(snapshot)), "wrong victim count")
        parent2.preemptionPolicy = policies.DefaultPreemptionPolicy
 
        // fencing leaf2 queue should not limit scope
        leaf2.preemptionPolicy = policies.FencePreemptionPolicy
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, "root")
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 5, len(snapshot), "wrong victim count")
        assert.Equal(t, 2, len(victims(snapshot)), "wrong victim count")
@@ -2062,12 +2062,32 @@ func TestFindEligiblePreemptionVictims(t *testing.T) {
        parent1.allocatedResource = parent1.maxResource
        assert.Equal(t, parent1.preemptionPolicy, 
policies.DefaultPreemptionPolicy)
        assert.Equal(t, leaf1.preemptionPolicy, 
policies.DefaultPreemptionPolicy)
-       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority)).QueuePath, parent1.QueuePath)
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, parent1.QueuePath)
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
        assert.Equal(t, 3, len(snapshot), "wrong snapshot count")
        assert.Equal(t, 0, len(victims(snapshot)), "wrong victim count")
        parent1.allocatedResource = used
 
+       // parent1 queue is not full yet, and this ask would bring it exactly 
to max resources.
+       // Reaching max exactly should not fence by max check.
+       parent1.allocatedResource = 
resources.NewResourceFromMap(map[string]resources.Quantity{siCommon.Memory: 
100})
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
+       snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)
+       assert.Equal(t, 5, len(snapshot), "wrong snapshot count")
+       assert.Equal(t, 2, len(victims(snapshot)), "wrong victim count")
+       parent1.allocatedResource = used
+
+       // empty max resources should not fence by max check
+       usedMax := parent1.maxResource
+       parent1.maxResource = resources.NewResource()
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
+       parent1.maxResource = usedMax
+
+       // disjoint max and projected resources should not fence by max check
+       parent1.maxResource = 
resources.NewResourceFromMap(map[string]resources.Quantity{"gpu": 1})
+       assert.Equal(t, leaf1.findPreemptionFenceRoot(make(map[string]int64), 
int64(ask.priority), ask.GetAllocatedResource()).QueuePath, "root")
+       parent1.maxResource = usedMax
+
        // requiring a specific node take alloc out of consideration
        alloc2.SetRequiredNode(nodeID1)
        snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to