This is an automated email from the ASF dual-hosted git repository.

wilfreds pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new f439d805 [YUNIKORN-2796] Prune zero resource type from root and 
partition (#943)
f439d805 is described below

commit f439d8053607a40d0048c039796f6dc893d01741
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Wed Aug 14 16:32:36 2024 +1000

    [YUNIKORN-2796] Prune zero resource type from root and partition (#943)
    
    After the last node is removed that registers a specific resource type
    the root queue and partition show it as a resource type with 0 maximum.
    A resource type that does not exist should not be shown at all.
    Pruning the 0 valued resource types for root and partition will fix that
    
    Clean up of the partition code to use the same function to update
    partition and root settings for create, update and delete of a node.
    
    Fix root queue check for max resource.
    Fix tests that fake allocation or node registration to correctly set the
    root queue limits.
    
    Closes: #943
    
    Signed-off-by: Wilfred Spiegelenburg <[email protected]>
---
 pkg/common/resources/resources.go        |  15 +++-
 pkg/common/resources/resources_test.go   |  37 ++++++++-
 pkg/scheduler/objects/node.go            |  11 ++-
 pkg/scheduler/objects/preemption_test.go |  34 ++++-----
 pkg/scheduler/objects/queue.go           |   6 ++
 pkg/scheduler/objects/queue_test.go      | 126 ++++++++++++++++++++++---------
 pkg/scheduler/partition.go               |  56 +++++---------
 pkg/scheduler/partition_test.go          |  36 ++++++---
 8 files changed, 217 insertions(+), 104 deletions(-)

diff --git a/pkg/common/resources/resources.go 
b/pkg/common/resources/resources.go
index 1b2a036b..c5bc7c54 100644
--- a/pkg/common/resources/resources.go
+++ b/pkg/common/resources/resources.go
@@ -144,7 +144,20 @@ func (r *Resource) Clone() *Resource {
        return ret
 }
 
-// Add additional resource to the base updating the base resource
+// Prune removes any resource type that has a zero value set.
+// Note that a zero value set and a type not being set are interpreted 
differently for quotas.
+func (r *Resource) Prune() {
+       if r == nil {
+               return
+       }
+       for k, v := range r.Resources {
+               if v == 0 {
+                       delete(r.Resources, k)
+               }
+       }
+}
+
+// AddTo adds the resource to the base updating the base resource
 // Should be used by temporary computation only
 // A nil base resource does not change
 // A nil passed in resource is treated as a zero valued resource and leaves 
base unchanged
diff --git a/pkg/common/resources/resources_test.go 
b/pkg/common/resources/resources_test.go
index 49df426a..88c2c421 100644
--- a/pkg/common/resources/resources_test.go
+++ b/pkg/common/resources/resources_test.go
@@ -24,9 +24,10 @@ import (
        "reflect"
        "testing"
 
-       "github.com/apache/yunikorn-scheduler-interface/lib/go/common"
-
+       "golang.org/x/exp/maps"
        "gotest.tools/v3/assert"
+
+       "github.com/apache/yunikorn-scheduler-interface/lib/go/common"
 )
 
 func CheckLenOfResource(res *Resource, expected int) (bool, string) {
@@ -2137,3 +2138,35 @@ func TestResource_DominantResource(t *testing.T) {
                })
        }
 }
+
+func TestResource_PruneNil(t *testing.T) {
+       // make sure we're nil safe IDE will complain about the receiver being 
nil
+       defer func() {
+               if r := recover(); r != nil {
+                       t.Fatal("panic on nil resource in prune test")
+               }
+       }()
+       var empty *Resource
+       empty.Prune()
+}
+
+func TestResource_Prune(t *testing.T) {
+       var tests = []struct {
+               caseName string
+               input    map[string]Quantity
+               output   map[string]Quantity
+       }{
+               {"no types", map[string]Quantity{}, map[string]Quantity{}},
+               {"all types with value", map[string]Quantity{"first": 1, 
"second": -2, "third": 3}, map[string]Quantity{"first": 1, "second": -2, 
"third": 3}},
+               {"zero type", map[string]Quantity{"first": 1, "zero": 0, 
"third": 3}, map[string]Quantity{"first": 1, "third": 3}},
+               {"no types with value", map[string]Quantity{"first": 0, 
"second": 0}, map[string]Quantity{}},
+       }
+       for _, tt := range tests {
+               t.Run(tt.caseName, func(t *testing.T) {
+                       original := NewResourceFromMap(tt.input)
+                       original.Prune()
+                       assert.Equal(t, len(tt.output), 
len(original.Resources), "unexpected resource types returned")
+                       assert.Assert(t, maps.Equal(original.Resources, 
tt.output), "resource type maps are not equal")
+               })
+       }
+}
diff --git a/pkg/scheduler/objects/node.go b/pkg/scheduler/objects/node.go
index 3ef64a1e..db77ee68 100644
--- a/pkg/scheduler/objects/node.go
+++ b/pkg/scheduler/objects/node.go
@@ -155,15 +155,22 @@ func (sn *Node) GetCapacity() *resources.Resource {
        return sn.totalResource.Clone()
 }
 
+// SetCapacity changes the node resource capacity and returns the resource 
delta.
+// The delta is positive for an increased capacity and negative for a decrease.
 func (sn *Node) SetCapacity(newCapacity *resources.Resource) 
*resources.Resource {
-       defer sn.notifyListeners()
+       var delta *resources.Resource
+       defer func() {
+               if delta != nil {
+                       sn.notifyListeners()
+               }
+       }()
        sn.Lock()
        defer sn.Unlock()
        if resources.Equals(sn.totalResource, newCapacity) {
                log.Log(log.SchedNode).Debug("skip updating capacity, not 
changed")
                return nil
        }
-       delta := resources.Sub(newCapacity, sn.totalResource)
+       delta = resources.Sub(newCapacity, sn.totalResource)
        sn.totalResource = newCapacity
        sn.refreshAvailableResource()
        sn.nodeEvents.SendNodeCapacityChangedEvent(sn.NodeID, 
sn.totalResource.Clone())
diff --git a/pkg/scheduler/objects/preemption_test.go 
b/pkg/scheduler/objects/preemption_test.go
index 04eb6a05..e928f207 100644
--- a/pkg/scheduler/objects/preemption_test.go
+++ b/pkg/scheduler/objects/preemption_test.go
@@ -144,7 +144,7 @@ func 
TestCheckPreemptionQueueGuaranteesWithNoGuaranteedResources(t *testing.T) {
                t.Run(tt.testName, func(t *testing.T) {
                        node := newNode("node1", 
map[string]resources.Quantity{"first": 20})
                        iterator := getNodeIteratorFn(node)
-                       rootQ, err := createRootQueue(nil)
+                       rootQ, err := 
createRootQueue(map[string]string{"first": "20"})
                        assert.NilError(t, err)
                        parentQ, err := createManagedQueueGuaranteed(rootQ, 
"parent", true, map[string]string{}, tt.parentGuaranteed)
                        assert.NilError(t, err)
@@ -242,7 +242,7 @@ func TestTryPreemptionOnNode(t *testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 5, 
"pods": 1})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 5, 
"pods": 1})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "10", "pods": 
"2"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "20"}, map[string]string{"first": "10"})
        assert.NilError(t, err)
@@ -305,7 +305,7 @@ func TestTryPreemptionOnNodeWithOGParentAndUGPreemptor(t 
*testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 3, 
"pods": 1})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 3, 
"pods": 1})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "6", "pods": 
"2"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "20"}, map[string]string{"first": "2"})
        assert.NilError(t, err)
@@ -370,7 +370,7 @@ func TestTryPreemptionOnQueue(t *testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, 
"pods": 2})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, 
"pods": 2})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "20", "pods": 
"4"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "10"}, nil)
        assert.NilError(t, err)
@@ -432,7 +432,7 @@ func 
TestTryPreemption_VictimsAvailable_InsufficientResource(t *testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, 
"pods": 2})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, 
"pods": 2})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(map[string]string{"first": "20", "pods": 
"5"})
+       rootQ, err := createRootQueue(map[string]string{"first": "20", "pods": 
"4"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "8"}, nil)
        assert.NilError(t, err)
@@ -485,7 +485,7 @@ func 
TestTryPreemption_VictimsOnDifferentNodes_InsufficientResource(t *testing.T
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 5, 
"pods": 1})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 5, 
"pods": 1})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "10", "pods": 
"2"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "6"}, nil)
        assert.NilError(t, err)
@@ -548,7 +548,7 @@ func TestTryPreemption_VictimsAvailableOnDifferentNodes(t 
*testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 5, 
"pods": 1})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 4, 
"pods": 1})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "9", "pods": 
"2"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "6"}, nil)
        assert.NilError(t, err)
@@ -613,7 +613,7 @@ func TestTryPreemption_OnQueue_VictimsOnDifferentNodes(t 
*testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 30})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 30})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "60"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "18"}, nil)
        assert.NilError(t, err)
@@ -697,7 +697,7 @@ func 
TestTryPreemption_OnQueue_VictimsAvailable_LowerPriority(t *testing.T) {
        node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 30})
        node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 30})
        iterator := getNodeIteratorFn(node1, node2)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"first": "60"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"first": "18"}, nil)
        assert.NilError(t, err)
@@ -781,7 +781,7 @@ func 
TestTryPreemption_OnQueue_VictimsAvailable_LowerPriority(t *testing.T) {
 func TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnPreemptorSide(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 3, 
"mem": 400})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "3", "mem": 
"400"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"vcores": "2"}, nil)
        assert.NilError(t, err)
@@ -851,7 +851,7 @@ func 
TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnPreemptorSide(t *test
 func 
TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnPreemptorSide(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 2, 
"mem": 400})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "2", "mem": 
"400"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"vcores": "2"}, nil)
        assert.NilError(t, err)
@@ -922,7 +922,7 @@ func 
TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnPreemptorSide(
 func 
TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSides(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 5, 
"mem": 700})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "5", "mem": 
"700"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"vcores": "3"}, nil)
        assert.NilError(t, err)
@@ -1024,7 +1024,7 @@ func 
TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSid
 func 
TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSides(t
 *testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 3, 
"mem": 600})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "3", "mem": 
"600"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
map[string]string{"vcores": "3"}, nil)
        assert.NilError(t, err)
@@ -1125,7 +1125,7 @@ func 
TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnVictimAndPreem
 func TestTryPreemption_AskResTypesSame_GuaranteedSetOnPreemptorSide(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 5, 
"gpu": 300, "mem": 200})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "5", "gpu": 
"300", "mem": "200"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
nil, nil)
        assert.NilError(t, err)
@@ -1227,7 +1227,7 @@ func 
TestTryPreemption_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *testing.T
 func TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnPreemptorSide(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 3, 
"gpu": 300, "mem": 200})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "3", "gpu": 
"300", "mem": "200"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
nil, nil)
        assert.NilError(t, err)
@@ -1328,7 +1328,7 @@ func 
TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *te
 func 
TestTryPreemption_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides(t 
*testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 5, 
"gpu": 700, "mem": 200})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "5", "gpu": 
"700", "mem": "200"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
nil, nil)
        assert.NilError(t, err)
@@ -1431,7 +1431,7 @@ func 
TestTryPreemption_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides(t
 func 
TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides(t
 *testing.T) {
        node := newNode(nodeID1, map[string]resources.Quantity{"vcores": 3, 
"gpu": 700, "mem": 200})
        iterator := getNodeIteratorFn(node)
-       rootQ, err := createRootQueue(nil)
+       rootQ, err := createRootQueue(map[string]string{"vcores": "3", "gpu": 
"700", "mem": "200"})
        assert.NilError(t, err)
        parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, 
nil, nil)
        assert.NilError(t, err)
diff --git a/pkg/scheduler/objects/queue.go b/pkg/scheduler/objects/queue.go
index c2cccef3..71b12e80 100644
--- a/pkg/scheduler/objects/queue.go
+++ b/pkg/scheduler/objects/queue.go
@@ -1028,6 +1028,12 @@ func (sq *Queue) IncAllocatedResource(alloc 
*resources.Resource, nodeReported bo
 func (sq *Queue) allocatedResFits(alloc *resources.Resource) bool {
        sq.RLock()
        defer sq.RUnlock()
+       // on the root we want to reject a new allocation if it asks for 
resources not registered
+       // so do not use the "undefined" flag, also handles pruned max for root
+       if sq.isRoot() {
+               return sq.maxResource.FitIn(resources.AddOnlyExisting(alloc, 
sq.allocatedResource))
+       }
+       // any other queue undefined is always good
        return sq.maxResource.FitInMaxUndef(resources.AddOnlyExisting(alloc, 
sq.allocatedResource))
 }
 
diff --git a/pkg/scheduler/objects/queue_test.go 
b/pkg/scheduler/objects/queue_test.go
index bcf4a06b..cd0700ce 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -1052,60 +1052,58 @@ func TestIsEmpty(t *testing.T) {
 
 func TestGetOutstandingRequestMax(t *testing.T) {
        // queue structure:
-       // root
+       // root (max.cpu = 5)
        //   - queue1 (max.cpu = 10)
        //   - queue2 (max.cpu = 5)
        //
        // submit app1 to root.queue1, app2 to root.queue2
        // app1 asks for 20 1x1CPU requests, app2 asks for 20 1x1CPU requests
        // verify the outstanding requests for each of the queue is up to its 
max capacity
+       // root max is irrelevant for the calculation
        // root: 15, queue1: 10 and queue2: 5
        // add an allocation for 5 CPU to queue1 and check the reduced numbers
        // root: 10, queue1: 5 and queue2: 5
-       alloc, err := resources.NewResourceFromConf(map[string]string{"cpu": 
"1"})
-       assert.NilError(t, err, "failed to create basic resource")
-       var used *resources.Resource
-       used, err = resources.NewResourceFromConf(map[string]string{"cpu": "5"})
-       assert.NilError(t, err, "failed to create basic resource")
-       testOutstanding(t, alloc, used)
+       testOutstanding(t, map[string]string{"cpu": "1"}, 
map[string]string{"cpu": "5"})
 }
 
 func TestGetOutstandingUntracked(t *testing.T) {
        // same test as TestGetOutstandingRequestMax but adding an unlimited 
resource to the
        // allocations to make sure it does not affect the calculations
        // queue structure:
-       // root
+       // root (max.cpu = 5, pods = 10)
        //   - queue1 (max.cpu = 10)
        //   - queue2 (max.cpu = 5)
        //
        // submit app1 to root.queue1, app2 to root.queue2
        // app1 asks for 20 1x1CPU, 1xPOD requests, app2 asks for 20 1x1CPU, 
1xPOD requests
        // verify the outstanding requests for each of the queue is up to its 
max capacity
+       // root max is irrelevant for the calculation
        // root: 15, queue1: 10 and queue2: 5
        // add an allocation for 5 CPU to queue1 and check the reduced numbers
        // root: 10, queue1: 5 and queue2: 5
-       alloc, err := resources.NewResourceFromConf(map[string]string{"cpu": 
"1", "pods": "2"})
-       assert.NilError(t, err, "failed to create basic resource")
-       var used *resources.Resource
-       used, err = resources.NewResourceFromConf(map[string]string{"cpu": "5", 
"pods": "10"})
-       assert.NilError(t, err, "failed to create basic resource")
-       testOutstanding(t, alloc, used)
+       testOutstanding(t, map[string]string{"cpu": "1", "pods": "2"}, 
map[string]string{"cpu": "5", "pods": "10"})
 }
 
-func testOutstanding(t *testing.T, alloc, used *resources.Resource) {
-       root, err := createRootQueue(nil)
-       assert.NilError(t, err, "failed to create root queue with limit")
+func testOutstanding(t *testing.T, allocMap, usedMap map[string]string) {
+       root, err := createRootQueue(usedMap)
        var queue1, queue2 *Queue
+       assert.NilError(t, err, "failed to create root queue with limit")
        queue1, err = createManagedQueue(root, "queue1", false, 
map[string]string{"cpu": "10"})
        assert.NilError(t, err, "failed to create queue1 queue")
        queue2, err = createManagedQueue(root, "queue2", false, 
map[string]string{"cpu": "5"})
        assert.NilError(t, err, "failed to create queue2 queue")
 
+       var allocRes, usedRes *resources.Resource
+       allocRes, err = resources.NewResourceFromConf(allocMap)
+       assert.NilError(t, err, "failed to create basic resource")
+       usedRes, err = resources.NewResourceFromConf(usedMap)
+       assert.NilError(t, err, "failed to create basic resource")
+
        app1 := newApplication(appID1, "default", "root.queue1")
        app1.queue = queue1
        queue1.AddApplication(app1)
        for i := 0; i < 20; i++ {
-               ask := newAllocationAsk(fmt.Sprintf("alloc-%d", i), appID1, 
alloc)
+               ask := newAllocationAsk(fmt.Sprintf("alloc-%d", i), appID1, 
allocRes)
                ask.SetSchedulingAttempted(true)
                err = app1.AddAllocationAsk(ask)
                assert.NilError(t, err, "failed to add allocation ask")
@@ -1115,7 +1113,7 @@ func testOutstanding(t *testing.T, alloc, used 
*resources.Resource) {
        app2.queue = queue2
        queue2.AddApplication(app2)
        for i := 0; i < 20; i++ {
-               ask := newAllocationAsk(fmt.Sprintf("alloc-%d", i), appID2, 
alloc)
+               ask := newAllocationAsk(fmt.Sprintf("alloc-%d", i), appID2, 
allocRes)
                ask.SetSchedulingAttempted(true)
                err = app2.AddAllocationAsk(ask)
                assert.NilError(t, err, "failed to add allocation ask")
@@ -1135,8 +1133,7 @@ func testOutstanding(t *testing.T, alloc, used 
*resources.Resource) {
        assert.Equal(t, len(queue2Total), 5)
 
        // simulate queue1 has some allocated resources
-       // after allocation, the max available becomes to be 5
-       err = queue1.IncAllocatedResource(used, false)
+       err = queue1.IncAllocatedResource(usedRes, false)
        assert.NilError(t, err, "failed to increment allocated resources")
 
        queue1Total = make([]*Allocation, 0)
@@ -1166,7 +1163,7 @@ func TestGetOutstandingOnlyUntracked(t *testing.T) {
        // all outstanding pods use only an unlimited resource type
        // max is set for a different resource type and fully allocated
        // queue structure:
-       // root
+       // root (max.cpu = 10, pods: 10)
        //   - queue1 (max.cpu = 10)
        //
        // submit app1 to root.queue1, app1 asks for 20 1xPOD requests
@@ -1176,10 +1173,11 @@ func TestGetOutstandingOnlyUntracked(t *testing.T) {
        alloc, err := resources.NewResourceFromConf(map[string]string{"pods": 
"1"})
        assert.NilError(t, err, "failed to create basic resource")
        var used *resources.Resource
-       used, err = resources.NewResourceFromConf(map[string]string{"cpu": 
"10", "pods": "10"})
+       usedMap := map[string]string{"cpu": "10", "pods": "10"}
+       used, err = resources.NewResourceFromConf(usedMap)
        assert.NilError(t, err, "failed to create basic resource")
        var root, queue1 *Queue
-       root, err = createRootQueue(nil)
+       root, err = createRootQueue(usedMap)
        assert.NilError(t, err, "failed to create root queue with limit")
        queue1, err = createManagedQueue(root, "queue1", false, 
map[string]string{"cpu": "10"})
        assert.NilError(t, err, "failed to create queue1 queue")
@@ -1267,14 +1265,20 @@ func TestGetOutstandingRequestNoMax(t *testing.T) {
 }
 
 func TestAllocationCalcRoot(t *testing.T) {
-       // create the root
-       root, err := createRootQueue(nil)
+       resMap := map[string]string{"memory": "100", "vcores": "10"}
+       // create the root: must set a max on the queue
+       root, err := createRootQueue(resMap)
        assert.NilError(t, err, "failed to create basic root queue")
        var res *resources.Resource
-       res, err = resources.NewResourceFromConf(map[string]string{"memory": 
"100", "vcores": "10"})
+       res, err = resources.NewResourceFromConf(resMap)
        assert.NilError(t, err, "failed to create basic resource")
        err = root.IncAllocatedResource(res, false)
        assert.NilError(t, err, "root queue allocation failed on increment")
+       // increment again should fail
+       err = root.IncAllocatedResource(res, false)
+       if err == nil {
+               t.Error("root queue allocation should have failed to increment 
(max hit)")
+       }
        err = root.DecAllocatedResource(res)
        assert.NilError(t, err, "root queue allocation failed on decrement")
        if !resources.IsZero(root.allocatedResource) {
@@ -1287,18 +1291,24 @@ func TestAllocationCalcRoot(t *testing.T) {
 }
 
 func TestAllocationCalcSub(t *testing.T) {
+       resMap := map[string]string{"memory": "100", "vcores": "10"}
        // create the root
-       root, err := createRootQueue(nil)
+       root, err := createRootQueue(resMap)
        assert.NilError(t, err, "failed to create basic root queue")
        var parent *Queue
        parent, err = createManagedQueue(root, "parent", true, nil)
        assert.NilError(t, err, "failed to create parent queue")
 
        var res *resources.Resource
-       res, err = resources.NewResourceFromConf(map[string]string{"memory": 
"100", "vcores": "10"})
+       res, err = resources.NewResourceFromConf(resMap)
        assert.NilError(t, err, "failed to create basic resource")
        err = parent.IncAllocatedResource(res, false)
        assert.NilError(t, err, "parent queue allocation failed on increment")
+       // increment again should fail
+       err = parent.IncAllocatedResource(res, false)
+       if err == nil {
+               t.Error("parent queue allocation should have failed to 
increment (root max hit)")
+       }
        err = parent.DecAllocatedResource(res)
        assert.NilError(t, err, "parent queue allocation failed on decrement")
        if !resources.IsZero(root.allocatedResource) {
@@ -2561,7 +2571,7 @@ func isNewApplicationEvent(t *testing.T, app 
*Application, record *si.EventRecor
        assert.Equal(t, si.EventRecord_APP_NEW, record.EventChangeDetail, 
"incorrect change detail, expected none")
 }
 
-func TestQueue_allocatedResFits(t *testing.T) {
+func TestQueue_allocatedResFits_Root(t *testing.T) {
        const first = "first"
        const second = "second"
        root, err := createRootQueue(nil)
@@ -2575,13 +2585,15 @@ func TestQueue_allocatedResFits(t *testing.T) {
                want   bool
        }{
                {"all nil", nil, nil, nil, true},
-               {"nil max no usage", nil, nil, map[string]string{first: "1"}, 
true},
-               {"nil max set usage", nil, map[string]string{first: "1"}, 
map[string]string{second: "1"}, true},
+               {"nil max no usage", nil, nil, map[string]string{first: "1"}, 
false},
+               {"nil max set usage", nil, map[string]string{first: "1"}, 
map[string]string{second: "1"}, false},
+               {"max = usage other in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{second: "1"}, false},
                {"max = usage same in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{first: "1"}, false},
-               {"max = usage other in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{second: "1"}, true},
-               {"usage over max other in alloc", map[string]string{first: "1", 
second: "0"}, map[string]string{second: "1"}, map[string]string{first: "1"}, 
true},
-               {"usage over max same in alloc", map[string]string{first: "1", 
second: "0"}, map[string]string{second: "1"}, map[string]string{second: "1"}, 
false},
-               {"partial fit", map[string]string{first: "2", second: "0"}, 
map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, false},
+               {"usage over undefined max other in alloc", 
map[string]string{first: "1"}, map[string]string{second: "1"}, 
map[string]string{first: "1"}, true},
+               {"usage over undefined max same in alloc", 
map[string]string{first: "1"}, map[string]string{second: "1"}, 
map[string]string{second: "1"}, false},
+               {"partial fit", map[string]string{first: "2"}, 
map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, false},
+               {"all fit no usage", map[string]string{first: "2", second: 
"2"}, nil, map[string]string{first: "1", second: "1"}, true},
+               {"all fit with usage", map[string]string{first: "2", second: 
"2"}, map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, true},
        }
        for _, tt := range tests {
                t.Run(tt.name, func(t *testing.T) {
@@ -2598,3 +2610,45 @@ func TestQueue_allocatedResFits(t *testing.T) {
                })
        }
 }
+
+func TestQueue_allocatedResFits_Other(t *testing.T) {
+       const first = "first"
+       const second = "second"
+       root, err := createRootQueue(nil)
+       assert.NilError(t, err, "queue create failed")
+       var leaf *Queue
+       leaf, err = createManagedQueue(root, "leaf", false, nil)
+
+       tests := []struct {
+               name   string
+               quota  map[string]string
+               used   map[string]string
+               change map[string]string
+               want   bool
+       }{
+               {"all nil", nil, nil, nil, true},
+               {"nil max no usage", nil, nil, map[string]string{first: "1"}, 
true},
+               {"nil max set usage", nil, map[string]string{first: "1"}, 
map[string]string{second: "1"}, true},
+               {"max = usage other in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{second: "1"}, true},
+               {"max = usage same in alloc", map[string]string{first: "1"}, 
map[string]string{first: "1"}, map[string]string{first: "1"}, false},
+               {"usage over zero max other in alloc", map[string]string{first: 
"1", second: "0"}, map[string]string{second: "1"}, map[string]string{first: 
"1"}, true},
+               {"usage over zero max same in alloc", map[string]string{first: 
"1", second: "0"}, map[string]string{second: "1"}, map[string]string{second: 
"1"}, false},
+               {"partial fit", map[string]string{first: "2", second: "1"}, 
map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, false},
+               {"all fit no usage", map[string]string{first: "2", second: 
"2"}, nil, map[string]string{first: "1", second: "1"}, true},
+               {"all fit", map[string]string{first: "2", second: "2"}, 
map[string]string{first: "1", second: "1"}, map[string]string{first: "1", 
second: "1"}, true},
+       }
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       var quota, used, change *resources.Resource
+                       quota, err = resources.NewResourceFromConf(tt.quota)
+                       assert.NilError(t, err, "failed to create basic 
resource: quota")
+                       leaf.maxResource = quota
+                       used, err = resources.NewResourceFromConf(tt.used)
+                       assert.NilError(t, err, "failed to create basic 
resource: used")
+                       leaf.allocatedResource = used
+                       change, err = resources.NewResourceFromConf(tt.change)
+                       assert.NilError(t, err, "failed to create basic 
resource: diff")
+                       assert.Equal(t, leaf.allocatedResFits(change), tt.want, 
"allocatedResFits incorrect state returned")
+               })
+       }
+}
diff --git a/pkg/scheduler/partition.go b/pkg/scheduler/partition.go
index 3b3eae70..a614ee4a 100644
--- a/pkg/scheduler/partition.go
+++ b/pkg/scheduler/partition.go
@@ -545,7 +545,8 @@ func (pc *PartitionContext) GetNode(nodeID string) 
*objects.Node {
        return pc.nodes.GetNode(nodeID)
 }
 
-// Add the node to the partition.
+// AddNode adds the node to the partition. Updates the partition and root 
queue resources if the node is added
+// successfully to the partition.
 // NOTE: this is a lock free call. It must NOT be called holding the 
PartitionContext lock.
 func (pc *PartitionContext) AddNode(node *objects.Node) error {
        if node == nil {
@@ -563,7 +564,9 @@ func (pc *PartitionContext) AddNode(node *objects.Node) 
error {
        return nil
 }
 
-// Update the partition resources based on the change of the node information
+// updatePartitionResource updates the partition resources based on the change 
of the node information.
+// The delta is added to the total partition resources. A removal or decrease 
MUST be negative.
+// The passed in delta is not changed and only read.
 func (pc *PartitionContext) updatePartitionResource(delta *resources.Resource) 
{
        pc.Lock()
        defer pc.Unlock()
@@ -573,39 +576,29 @@ func (pc *PartitionContext) updatePartitionResource(delta 
*resources.Resource) {
                } else {
                        pc.totalPartitionResource.AddTo(delta)
                }
+               // remove any zero values from the final resource definition
+               pc.totalPartitionResource.Prune()
+               // set the root queue size
                pc.root.SetMaxResource(pc.totalPartitionResource)
        }
 }
 
-// Update the partition details when adding a node.
+// addNodeToList adds a node to the partition, and updates the metrics & 
resource tracking information
+// if the node was added successfully to the partition.
+// NOTE: this is a lock free call. It must NOT be called holding the 
PartitionContext lock.
 func (pc *PartitionContext) addNodeToList(node *objects.Node) error {
        // we don't grab a lock here because we only update pc.nodes which is 
internally protected
        if err := pc.nodes.AddNode(node); err != nil {
                return fmt.Errorf("failed to add node %s to partition %s, 
error: %v", node.NodeID, pc.Name, err)
        }
 
-       pc.addNodeResources(node)
-       return nil
-}
-
-// Update metrics & resource tracking information.
-// This locks the partition. The partition may not be locked when we process 
the allocation
-// additions to the node as that takes further app, queue or node locks.
-func (pc *PartitionContext) addNodeResources(node *objects.Node) {
-       pc.Lock()
-       defer pc.Unlock()
+       pc.updatePartitionResource(node.GetCapacity())
        metrics.GetSchedulerMetrics().IncActiveNodes()
-       // update/set the resources available in the cluster
-       if pc.totalPartitionResource == nil {
-               pc.totalPartitionResource = node.GetCapacity().Clone()
-       } else {
-               pc.totalPartitionResource.AddTo(node.GetCapacity())
-       }
-       pc.root.SetMaxResource(pc.totalPartitionResource)
        log.Log(log.SchedPartition).Info("Updated available resources from 
added node",
                zap.String("partitionName", pc.Name),
                zap.String("nodeID", node.NodeID),
-               zap.Stringer("partitionResource", pc.totalPartitionResource))
+               zap.Stringer("partitionResource", 
pc.GetTotalPartitionResource()))
+       return nil
 }
 
 // removeNodeFromList removes the node from the list of partition nodes.
@@ -626,20 +619,6 @@ func (pc *PartitionContext) removeNodeFromList(nodeID 
string) *objects.Node {
        return node
 }
 
-// removeNodeResources updates the partition and root queue resources as part 
of the node removal process.
-// This locks the partition.
-func (pc *PartitionContext) removeNodeResources(node *objects.Node) {
-       pc.Lock()
-       defer pc.Unlock()
-       // cleanup the available resources, partition resources cannot be nil 
at this point
-       pc.totalPartitionResource.SubFrom(node.GetCapacity())
-       pc.root.SetMaxResource(pc.totalPartitionResource)
-       log.Log(log.SchedPartition).Info("Updated available resources from 
removed node",
-               zap.String("partitionName", pc.Name),
-               zap.String("nodeID", node.NodeID),
-               zap.Stringer("partitionResource", pc.totalPartitionResource))
-}
-
 // removeNode removes a node from the partition. It returns all released and 
confirmed allocations.
 // The released allocations are all linked to the current node.
 // The confirmed allocations are real allocations that are linked to 
placeholders on the current node and are linked to
@@ -668,7 +647,12 @@ func (pc *PartitionContext) removeNode(nodeID string) 
([]*objects.Allocation, []
        released, confirmed := pc.removeNodeAllocations(node)
 
        // update the resource linked to this node, all allocations are 
removed, queue usage should have decreased
-       pc.removeNodeResources(node)
+       // The delta passed in must be negative: the delta is always added
+       pc.updatePartitionResource(resources.Multiply(node.GetCapacity(), -1))
+       log.Log(log.SchedPartition).Info("Updated available resources from 
removed node",
+               zap.String("partitionName", pc.Name),
+               zap.String("nodeID", node.NodeID),
+               zap.Stringer("partitionResource", 
pc.GetTotalPartitionResource()))
        return released, confirmed
 }
 
diff --git a/pkg/scheduler/partition_test.go b/pkg/scheduler/partition_test.go
index c6db6280..f138d239 100644
--- a/pkg/scheduler/partition_test.go
+++ b/pkg/scheduler/partition_test.go
@@ -279,6 +279,7 @@ func TestRemoveNodeWithAllocations(t *testing.T) {
        // remove the node this cannot fail
        released, confirmed := partition.removeNode(nodeID1)
        assert.Equal(t, 0, partition.GetTotalNodeCount(), "node list was not 
updated, node was not removed")
+       assert.Assert(t, partition.GetTotalPartitionResource().IsEmpty(), 
"partition should have 'empty' resource object (pruned)")
        assert.Equal(t, 1, len(released), "node did not release correct 
allocation")
        assert.Equal(t, 0, len(confirmed), "node did not confirm correct 
allocation")
        assert.Equal(t, released[0].GetAllocationKey(), allocAllocationKey, 
"allocationKey returned by release not the same as on allocation")
@@ -337,6 +338,7 @@ func TestRemoveNodeWithPlaceholders(t *testing.T) {
        // remove the node that has both placeholder and real allocation
        released, confirmed := partition.removeNode(nodeID1)
        assert.Equal(t, 0, partition.GetTotalNodeCount(), "node list was not 
updated, node was not removed")
+       assert.Assert(t, partition.GetTotalPartitionResource().IsEmpty(), 
"partition should have 'empty' resource object (pruned)")
        assert.Equal(t, 1, len(released), "node removal did not release correct 
allocation")
        assert.Equal(t, 0, len(confirmed), "node removal should not have 
confirmed allocation")
        assert.Equal(t, ph.GetAllocationKey(), released[0].GetAllocationKey(), 
"allocationKey returned by release not the same as the placeholder")
@@ -2593,29 +2595,38 @@ func TestUpdateRootQueue(t *testing.T) {
        assert.Equal(t, partition.GetQueue("root.parent").CurrentState(), 
objects.Draining.String(), "parent queue should have been marked for removal")
 
        // add new node, node 3 with 'memory' resource type
-       res1, err1 := resources.NewResourceFromConf(map[string]string{"vcore": 
"20", "memory": "50"})
-       assert.NilError(t, err1, "resource creation failed")
-       err = partition.AddNode(newNodeMaxResource("node-3", res1))
+       res, err = resources.NewResourceFromConf(map[string]string{"memory": 
"50"})
+       assert.NilError(t, err, "resource creation failed")
+       err = partition.AddNode(newNodeMaxResource("node-3", res))
        assert.NilError(t, err, "test node3 add failed unexpected")
 
        // root max resource gets updated with 'memory' resource type
-       expRes, err1 := 
resources.NewResourceFromConf(map[string]string{"vcore": "40", "memory": "50"})
-       assert.NilError(t, err1, "resource creation failed")
-       assert.Assert(t, resources.Equals(expRes, 
partition.root.GetMaxResource()), "root max resource not set as expected")
+       res, err = resources.NewResourceFromConf(map[string]string{"vcore": 
"20", "memory": "50"})
+       assert.NilError(t, err, "resource creation failed")
+       assert.Assert(t, resources.Equals(res, 
partition.totalPartitionResource), "partition resource not set as expected")
+       assert.Assert(t, resources.Equals(res, 
partition.root.GetMaxResource()), "root max resource not set as expected")
 
        // remove node, node 3. root max resource won't have 'memory' resource 
type and updated with less 'vcore'
        partition.removeNode("node-3")
+       res, err = resources.NewResourceFromConf(map[string]string{"vcore": 
"20"})
+       assert.NilError(t, err, "resource creation failed")
        assert.Assert(t, resources.Equals(res, 
partition.root.GetMaxResource()), "root max resource not set as expected")
-
+       assert.Assert(t, resources.Equals(res, 
partition.totalPartitionResource), "partition resource not set as expected")
+       assert.Equal(t, len(res.Resources), 
len(partition.root.GetMaxResource().Resources), "expected pruned resource on 
queue without memory set")
+       assert.Equal(t, len(res.Resources), 
len(partition.totalPartitionResource.Resources), "expected pruned resource on 
partition without memory set")
        // remove node, node 2. root max resource gets updated with less 
'vcores'
        partition.removeNode("node-2")
-       expRes1, err1 := 
resources.NewResourceFromConf(map[string]string{"vcore": "10"})
-       assert.NilError(t, err1, "resource creation failed")
-       assert.Assert(t, resources.Equals(expRes1, 
partition.root.GetMaxResource()), "root max resource not set as expected")
+       res, err = resources.NewResourceFromConf(map[string]string{"vcore": 
"10"})
+       assert.NilError(t, err, "resource creation failed")
+       assert.Assert(t, resources.Equals(res, 
partition.root.GetMaxResource()), "root max resource not set as expected")
+       assert.Assert(t, resources.Equals(res, 
partition.totalPartitionResource), "partition resource not set as expected")
+       assert.Equal(t, len(res.Resources), 
len(partition.root.GetMaxResource().Resources), "expected pruned resource on 
queue without memory set")
+       assert.Equal(t, len(res.Resources), 
len(partition.totalPartitionResource.Resources), "expected pruned resource on 
partition without memory set")
 
        // remove node, node 1. root max resource should set to nil
        partition.removeNode("node-1")
        assert.Assert(t, resources.Equals(nil, 
partition.root.GetMaxResource()), "root max resource not set as expected")
+       assert.Assert(t, partition.totalPartitionResource.IsEmpty(), "partition 
resource not set as expected")
 }
 
 // transition an application to completed state and wait for it to be 
processed into the completedApplications map
@@ -2779,6 +2790,11 @@ func TestUpdateNode(t *testing.T) {
        if !resources.Equals(expectedRes, 
partition.GetTotalPartitionResource()) {
                t.Errorf("Expected partition resource %s, doesn't match with 
actual partition resource %s", expectedRes, 
partition.GetTotalPartitionResource())
        }
+
+       // clear out and make sure it is pruned
+       delta = resources.Multiply(expectedRes, -1)
+       partition.updatePartitionResource(delta)
+       assert.Assert(t, partition.GetTotalPartitionResource().IsEmpty())
 }
 
 func TestAddTGApplication(t *testing.T) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to