This is an automated email from the ASF dual-hosted git repository.
ccondit pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new d25ccffb [YUNIKORN-2790] Only check requested resource type of
allocations (#933)
d25ccffb is described below
commit d25ccffb01439660e3f1e4f138d6de111470eb98
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Thu Aug 8 09:06:46 2024 -0500
[YUNIKORN-2790] Only check requested resource type of allocations (#933)
Queue quota checks should not fail if the resource types requested by an
allocation are all below the maximum. Resource types that are not part
of the allocation to be added might be over their maximum.
This handles three cases:
* Node restarts with custom resources that need registering, like GPUs,
no longer prevent allocations not requesting that resource from being
scheduled.
* Queue quota configuration changes that lower a quota to below its
usage for a resource type do not block allocations that do not request
that type from being scheduled.
* Best effort pods, no memory or cpu request, can still be scheduled
even if a queue is over its memory and cpu quota.
Closes: #933
Signed-off-by: Craig Condit <[email protected]>
---
pkg/common/resources/resources.go | 28 ++++++++++++++++++-------
pkg/common/resources/resources_test.go | 34 ++++++++++++++++++++++++++++++
pkg/scheduler/objects/queue.go | 12 +++++------
pkg/scheduler/objects/queue_test.go | 38 ++++++++++++++++++++++++++++++++++
4 files changed, 99 insertions(+), 13 deletions(-)
diff --git a/pkg/common/resources/resources.go
b/pkg/common/resources/resources.go
index e1236c7c..0c6eb15c 100644
--- a/pkg/common/resources/resources.go
+++ b/pkg/common/resources/resources.go
@@ -130,18 +130,18 @@ func (r *Resource) ToProto() *si.Resource {
return proto
}
-// Return a clone (copy) of the resource it is called on.
+// Clone returns a clone (copy) of the resource it is called on.
// This provides a deep copy of the object with the exact same member set.
// NOTE: this is a clone not a sparse copy of the original.
func (r *Resource) Clone() *Resource {
+ if r == nil {
+ return nil
+ }
ret := NewResource()
- if r != nil {
- for k, v := range r.Resources {
- ret.Resources[k] = v
- }
- return ret
+ for k, v := range r.Resources {
+ ret.Resources[k] = v
}
- return nil
+ return ret
}
// Add additional resource to the base updating the base resource
@@ -364,6 +364,20 @@ func (r *Resource) SubOnlyExisting(delta *Resource) {
}
}
+// AddOnlyExisting adds delta to base resource, ignoring any type not defined
in the base resource.
+func AddOnlyExisting(base, delta *Resource) *Resource {
+ // check nil inputs and shortcut
+ if base == nil || delta == nil {
+ return base.Clone()
+ }
+ // neither are nil, add the delta
+ result := NewResource()
+ for k := range base.Resources {
+ result.Resources[k] = addVal(base.Resources[k],
delta.Resources[k])
+ }
+ return result
+}
+
// SubEliminateNegative subtracts resource returning a new resource with the
result
// A nil resource is considered an empty resource
// This will return 0 values for negative values
diff --git a/pkg/common/resources/resources_test.go
b/pkg/common/resources/resources_test.go
index c1092cf4..a497c187 100644
--- a/pkg/common/resources/resources_test.go
+++ b/pkg/common/resources/resources_test.go
@@ -1124,6 +1124,40 @@ func TestSubOnlyExisting(t *testing.T) {
assert.Assert(t, Equals(left, expected), "sub failed expected %v,
actual %v", expected, left)
}
+func TestAddOnlyExisting(t *testing.T) {
+ var tests = []struct {
+ caseName string
+ base map[string]Quantity
+ delta map[string]Quantity
+ expected map[string]Quantity
+ }{
+ {"nil resources", nil, nil, nil},
+ {"nil base", nil, map[string]Quantity{"a": 5}, nil},
+ {"nil delta", map[string]Quantity{"a": 5}, nil,
map[string]Quantity{"a": 5}},
+ {"empty base", map[string]Quantity{}, map[string]Quantity{"a":
5}, map[string]Quantity{}},
+ {"single type base matched", map[string]Quantity{"a": 0},
map[string]Quantity{"a": 1, "b": 1}, map[string]Quantity{"a": 1}},
+ {"single type base unmatched", map[string]Quantity{"a": 0},
map[string]Quantity{"b": 1}, map[string]Quantity{"a": 0}},
+ {"multi type base partial match", map[string]Quantity{"a": 1,
"b": 0}, map[string]Quantity{"a": 1, "c": 10}, map[string]Quantity{"a": 2, "b":
0}},
+ {"multi type base match", map[string]Quantity{"a": 1, "b": 0},
map[string]Quantity{"a": 1, "b": 10}, map[string]Quantity{"a": 2, "b": 10}},
+ }
+ for _, tt := range tests {
+ t.Run(tt.caseName, func(t *testing.T) {
+ var base, delta, expected *Resource
+ if tt.base != nil {
+ base = NewResourceFromMap(tt.base)
+ }
+ if tt.delta != nil {
+ delta = NewResourceFromMap(tt.delta)
+ }
+ if tt.expected != nil {
+ expected = NewResourceFromMap(tt.expected)
+ }
+ result := AddOnlyExisting(base, delta)
+ assert.Assert(t, Equals(result, expected), "incorrect
result returned")
+ })
+ }
+}
+
func TestSubErrorNegative(t *testing.T) {
// simple case (nil checks)
result, err := SubErrorNegative(nil, nil)
diff --git a/pkg/scheduler/objects/queue.go b/pkg/scheduler/objects/queue.go
index 36519cf5..bac9e84b 100644
--- a/pkg/scheduler/objects/queue.go
+++ b/pkg/scheduler/objects/queue.go
@@ -993,8 +993,7 @@ func (sq *Queue) isRoot() bool {
// Guard against going over max resources if set
func (sq *Queue) IncAllocatedResource(alloc *resources.Resource, nodeReported
bool) error {
// check this queue: failure stops checks if the allocation is not part
of a node addition
- fit, newAllocated := sq.allocatedResFits(alloc)
- if !nodeReported && !fit {
+ if !nodeReported && !sq.allocatedResFits(alloc) {
return fmt.Errorf("allocation (%v) puts queue '%s' over maximum
allocation (%v), current usage (%v)",
alloc, sq.QueuePath, sq.maxResource,
sq.allocatedResource)
}
@@ -1017,17 +1016,18 @@ func (sq *Queue) IncAllocatedResource(alloc
*resources.Resource, nodeReported bo
sq.Lock()
defer sq.Unlock()
// all OK update this queue
- sq.allocatedResource = newAllocated
+ sq.allocatedResource = resources.Add(sq.allocatedResource, alloc)
sq.updateAllocatedResourceMetrics()
return nil
}
+// allocatedResFits adds the passed in resource to the allocatedResource of
the queue and checks if it still fits in the
+// queues' maximum. If the resource fits it returns true otherwise false.
// small helper method to access sq.maxResource+sq.allocatedResource and avoid
Clone() call
-func (sq *Queue) allocatedResFits(res *resources.Resource) (bool,
*resources.Resource) {
+func (sq *Queue) allocatedResFits(alloc *resources.Resource) bool {
sq.RLock()
defer sq.RUnlock()
- newAllocated := resources.Add(sq.allocatedResource, res)
- return sq.maxResource.FitInMaxUndef(newAllocated), newAllocated
+ return sq.maxResource.FitInMaxUndef(resources.AddOnlyExisting(alloc,
sq.allocatedResource))
}
// DecAllocatedResource decrement the allocated resources for this queue
(recursively)
diff --git a/pkg/scheduler/objects/queue_test.go
b/pkg/scheduler/objects/queue_test.go
index a43306f7..5eaa081f 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -2556,3 +2556,41 @@ func isNewApplicationEvent(t *testing.T, app
*Application, record *si.EventRecor
assert.Equal(t, si.EventRecord_ADD, record.EventChangeType, "incorrect
change type, expected add")
assert.Equal(t, si.EventRecord_APP_NEW, record.EventChangeDetail,
"incorrect change detail, expected none")
}
+
+func TestQueue_allocatedResFits(t *testing.T) {
+ const first = "first"
+ const second = "second"
+ root, err := createRootQueue(nil)
+ assert.NilError(t, err, "queue create failed")
+
+ tests := []struct {
+ name string
+ quota map[string]string
+ used map[string]string
+ change map[string]string
+ want bool
+ }{
+ {"all nil", nil, nil, nil, true},
+ {"nil max no usage", nil, nil, map[string]string{first: "1"},
true},
+ {"nil max set usage", nil, map[string]string{first: "1"},
map[string]string{second: "1"}, true},
+ {"max = usage same in alloc", map[string]string{first: "1"},
map[string]string{first: "1"}, map[string]string{first: "1"}, false},
+ {"max = usage other in alloc", map[string]string{first: "1"},
map[string]string{first: "1"}, map[string]string{second: "1"}, true},
+ {"usage over max other in alloc", map[string]string{first: "1",
second: "0"}, map[string]string{second: "1"}, map[string]string{first: "1"},
true},
+ {"usage over max same in alloc", map[string]string{first: "1",
second: "0"}, map[string]string{second: "1"}, map[string]string{second: "1"},
false},
+ {"partial fit", map[string]string{first: "2", second: "0"},
map[string]string{first: "1", second: "1"}, map[string]string{first: "1",
second: "1"}, false},
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var quota, used, change *resources.Resource
+ quota, err = resources.NewResourceFromConf(tt.quota)
+ assert.NilError(t, err, "failed to create basic
resource: quota")
+ root.SetMaxResource(quota)
+ used, err = resources.NewResourceFromConf(tt.used)
+ assert.NilError(t, err, "failed to create basic
resource: used")
+ root.allocatedResource = used
+ change, err = resources.NewResourceFromConf(tt.change)
+ assert.NilError(t, err, "failed to create basic
resource: diff")
+ assert.Equal(t, root.allocatedResFits(change), tt.want,
"allocatedResFits incorrect state returned")
+ })
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]