This is an automated email from the ASF dual-hosted git repository.

ccondit pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new d25ccffb [YUNIKORN-2790] Only check requested resource type of 
allocations (#933)
d25ccffb is described below

commit d25ccffb01439660e3f1e4f138d6de111470eb98
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Thu Aug 8 09:06:46 2024 -0500

    [YUNIKORN-2790] Only check requested resource type of allocations (#933)
    
    Queue quota checks should not fail if the resource types requested by an
    allocation are all below the maximum. Resource types that are not part
    of the allocation to be added might be over their maximum.
    
    This handles three cases:
    
    * Node restarts with custom resources that need registering, like GPUs,
      no longer prevent allocations not requesting that resource from being
      scheduled.
    
    * Queue quota configuration changes that lower a quota to below its
      usage for a resource type do not block allocations that do not request
      that type from being scheduled.
    
    * Best effort pods, no memory or cpu request, can still be scheduled
      even if a queue is over its memory and cpu quota.
    
    Closes: #933
    
    Signed-off-by: Craig Condit <[email protected]>
---
 pkg/common/resources/resources.go      | 28 ++++++++++++++++++-------
 pkg/common/resources/resources_test.go | 34 ++++++++++++++++++++++++++++++
 pkg/scheduler/objects/queue.go         | 12 +++++------
 pkg/scheduler/objects/queue_test.go    | 38 ++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/pkg/common/resources/resources.go 
b/pkg/common/resources/resources.go
index e1236c7c..0c6eb15c 100644
--- a/pkg/common/resources/resources.go
+++ b/pkg/common/resources/resources.go
@@ -130,18 +130,18 @@ func (r *Resource) ToProto() *si.Resource {
        return proto
 }
 
-// Return a clone (copy) of the resource it is called on.
+// Clone returns a clone (copy) of the resource it is called on.
 // This provides a deep copy of the object with the exact same member set.
 // NOTE: this is a clone not a sparse copy of the original.
 func (r *Resource) Clone() *Resource {
+       if r == nil {
+               return nil
+       }
        ret := NewResource()
-       if r != nil {
-               for k, v := range r.Resources {
-                       ret.Resources[k] = v
-               }
-               return ret
+       for k, v := range r.Resources {
+               ret.Resources[k] = v
        }
-       return nil
+       return ret
 }
 
 // Add additional resource to the base updating the base resource
@@ -364,6 +364,20 @@ func (r *Resource) SubOnlyExisting(delta *Resource) {
        }
 }
 
+// AddOnlyExisting adds delta to base resource, ignoring any type not defined 
in the base resource.
+func AddOnlyExisting(base, delta *Resource) *Resource {
+       // check nil inputs and shortcut
+       if base == nil || delta == nil {
+               return base.Clone()
+       }
+       // neither are nil, add the delta
+       result := NewResource()
+       for k := range base.Resources {
+               result.Resources[k] = addVal(base.Resources[k], 
delta.Resources[k])
+       }
+       return result
+}
+
 // SubEliminateNegative subtracts resource returning a new resource with the 
result
 // A nil resource is considered an empty resource
 // This will return 0 values for negative values
diff --git a/pkg/common/resources/resources_test.go 
b/pkg/common/resources/resources_test.go
index c1092cf4..a497c187 100644
--- a/pkg/common/resources/resources_test.go
+++ b/pkg/common/resources/resources_test.go
@@ -1124,6 +1124,40 @@ func TestSubOnlyExisting(t *testing.T) {
        assert.Assert(t, Equals(left, expected), "sub failed expected %v, 
actual %v", expected, left)
 }
 
+func TestAddOnlyExisting(t *testing.T) {
+       var tests = []struct {
+               caseName string
+               base     map[string]Quantity
+               delta    map[string]Quantity
+               expected map[string]Quantity
+       }{
+               {"nil resources", nil, nil, nil},
+               {"nil base", nil, map[string]Quantity{"a": 5}, nil},
+               {"nil delta", map[string]Quantity{"a": 5}, nil, 
map[string]Quantity{"a": 5}},
+               {"empty base", map[string]Quantity{}, map[string]Quantity{"a": 
5}, map[string]Quantity{}},
+               {"single type base matched", map[string]Quantity{"a": 0}, 
map[string]Quantity{"a": 1, "b": 1}, map[string]Quantity{"a": 1}},
+               {"single type base unmatched", map[string]Quantity{"a": 0}, 
map[string]Quantity{"b": 1}, map[string]Quantity{"a": 0}},
+               {"multi type base partial match", map[string]Quantity{"a": 1, 
"b": 0}, map[string]Quantity{"a": 1, "c": 10}, map[string]Quantity{"a": 2, "b": 
0}},
+               {"multi type base match", map[string]Quantity{"a": 1, "b": 0}, 
map[string]Quantity{"a": 1, "b": 10}, map[string]Quantity{"a": 2, "b": 10}},
+       }
+       for _, tt := range tests {
+               t.Run(tt.caseName, func(t *testing.T) {
+                       var base, delta, expected *Resource
+                       if tt.base != nil {
+                               base = NewResourceFromMap(tt.base)
+                       }
+                       if tt.delta != nil {
+                               delta = NewResourceFromMap(tt.delta)
+                       }
+                       if tt.expected != nil {
+                               expected = NewResourceFromMap(tt.expected)
+                       }
+                       result := AddOnlyExisting(base, delta)
+                       assert.Assert(t, Equals(result, expected), "incorrect 
result returned")
+               })
+       }
+}
+
 func TestSubErrorNegative(t *testing.T) {
        // simple case (nil checks)
        result, err := SubErrorNegative(nil, nil)
diff --git a/pkg/scheduler/objects/queue.go b/pkg/scheduler/objects/queue.go
index 36519cf5..bac9e84b 100644
--- a/pkg/scheduler/objects/queue.go
+++ b/pkg/scheduler/objects/queue.go
@@ -993,8 +993,7 @@ func (sq *Queue) isRoot() bool {
 // Guard against going over max resources if set
 func (sq *Queue) IncAllocatedResource(alloc *resources.Resource, nodeReported 
bool) error {
        // check this queue: failure stops checks if the allocation is not part 
of a node addition
-       fit, newAllocated := sq.allocatedResFits(alloc)
-       if !nodeReported && !fit {
+       if !nodeReported && !sq.allocatedResFits(alloc) {
                return fmt.Errorf("allocation (%v) puts queue '%s' over maximum 
allocation (%v), current usage (%v)",
                        alloc, sq.QueuePath, sq.maxResource, 
sq.allocatedResource)
        }
@@ -1017,17 +1016,18 @@ func (sq *Queue) IncAllocatedResource(alloc 
*resources.Resource, nodeReported bo
        sq.Lock()
        defer sq.Unlock()
        // all OK update this queue
-       sq.allocatedResource = newAllocated
+       sq.allocatedResource = resources.Add(sq.allocatedResource, alloc)
        sq.updateAllocatedResourceMetrics()
        return nil
 }
 
+// allocatedResFits adds the passed in resource to the allocatedResource of 
the queue and checks if it still fits in the
+// queues' maximum. If the resource fits it returns true otherwise false.
 // small helper method to access sq.maxResource+sq.allocatedResource and avoid 
Clone() call
-func (sq *Queue) allocatedResFits(res *resources.Resource) (bool, 
*resources.Resource) {
+func (sq *Queue) allocatedResFits(alloc *resources.Resource) bool {
        sq.RLock()
        defer sq.RUnlock()
-       newAllocated := resources.Add(sq.allocatedResource, res)
-       return sq.maxResource.FitInMaxUndef(newAllocated), newAllocated
+       return sq.maxResource.FitInMaxUndef(resources.AddOnlyExisting(alloc, 
sq.allocatedResource))
 }
 
 // DecAllocatedResource decrement the allocated resources for this queue 
(recursively)
diff --git a/pkg/scheduler/objects/queue_test.go 
b/pkg/scheduler/objects/queue_test.go
index a43306f7..5eaa081f 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -2556,3 +2556,41 @@ func isNewApplicationEvent(t *testing.T, app 
*Application, record *si.EventRecor
        assert.Equal(t, si.EventRecord_ADD, record.EventChangeType, "incorrect 
change type, expected add")
        assert.Equal(t, si.EventRecord_APP_NEW, record.EventChangeDetail, 
"incorrect change detail, expected none")
 }
+
+func TestQueue_allocatedResFits(t *testing.T) {
+       const first = "first"
+       const second = "second"
+       root, err := createRootQueue(nil)
+       assert.NilError(t, err, "queue create failed")
+
+       tests := []struct {
+               name   string
+               quota  map[string]string
+               used   map[string]string
+               change map[string]string
+               want   bool
+       }{
+               {"all nil", nil, nil, nil, true},
+               {"nil max no usage", nil, nil, map[string]string{first: "1"}, 
true},
+               {"nil max set usage", nil, map[string]string{first: "1"}, 
map[string]string{second: "1"}, true},
+               {"max = usage same in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{first: "1"}, false},
+               {"max = usage other in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{second: "1"}, true},
+               {"usage over max other in alloc", map[string]string{first: "1", 
second: "0"}, map[string]string{second: "1"}, map[string]string{first: "1"}, 
true},
+               {"usage over max same in alloc", map[string]string{first: "1", 
second: "0"}, map[string]string{second: "1"}, map[string]string{second: "1"}, 
false},
+               {"partial fit", map[string]string{first: "2", second: "0"}, 
map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, false},
+       }
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       var quota, used, change *resources.Resource
+                       quota, err = resources.NewResourceFromConf(tt.quota)
+                       assert.NilError(t, err, "failed to create basic 
resource: quota")
+                       root.SetMaxResource(quota)
+                       used, err = resources.NewResourceFromConf(tt.used)
+                       assert.NilError(t, err, "failed to create basic 
resource: used")
+                       root.allocatedResource = used
+                       change, err = resources.NewResourceFromConf(tt.change)
+                       assert.NilError(t, err, "failed to create basic 
resource: diff")
+                       assert.Equal(t, root.allocatedResFits(change), tt.want, 
"allocatedResFits incorrect state returned")
+               })
+       }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to