This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new f83134b0 [YUNIKORN-1920] headroom with parent queue usage (#620)
f83134b0 is described below

commit f83134b0776bef792141085d23d37295140587cd
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Tue Aug 29 12:39:41 2023 +0530

    [YUNIKORN-1920] headroom with parent queue usage (#620)
    
    Headroom needs to take into account the usage in each queue and not
    assume that the full max resource is available if the queue is not the
    leaf queue.
    Add a queue traker object for any queue that is not found while
    traversing the hierarchy. The user/group has an application running in
    that queue and will add usage at some point.
    
    Add tests to cover usage and max with different resource types set.
    
    Closes: #620
    
    Signed-off-by: Manikandan R <[email protected]>
---
 pkg/scheduler/ugm/group_tracker.go      |   4 +-
 pkg/scheduler/ugm/queue_tracker.go      |  36 +++++------
 pkg/scheduler/ugm/queue_tracker_test.go | 110 +++++++++++++++++---------------
 pkg/scheduler/ugm/user_tracker.go       |   4 +-
 4 files changed, 81 insertions(+), 73 deletions(-)

diff --git a/pkg/scheduler/ugm/group_tracker.go 
b/pkg/scheduler/ugm/group_tracker.go
index 150016c5..1830df2a 100644
--- a/pkg/scheduler/ugm/group_tracker.go
+++ b/pkg/scheduler/ugm/group_tracker.go
@@ -19,9 +19,11 @@
 package ugm
 
 import (
+       "strings"
        "sync"
 
        "github.com/apache/yunikorn-core/pkg/common"
+       "github.com/apache/yunikorn-core/pkg/common/configs"
        "github.com/apache/yunikorn-core/pkg/common/resources"
        "github.com/apache/yunikorn-core/pkg/webservice/dao"
 )
@@ -81,7 +83,7 @@ func (gt *GroupTracker) setLimits(queuePath string, resource 
*resources.Resource
 func (gt *GroupTracker) headroom(queuePath string) *resources.Resource {
        gt.Lock()
        defer gt.Unlock()
-       return gt.queueTracker.headroom(queuePath)
+       return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
 }
 
 func (gt *GroupTracker) GetGroupResourceUsageDAOInfo() 
*dao.GroupResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/queue_tracker.go 
b/pkg/scheduler/ugm/queue_tracker.go
index 8dc0fe99..70c70bea 100644
--- a/pkg/scheduler/ugm/queue_tracker.go
+++ b/pkg/scheduler/ugm/queue_tracker.go
@@ -245,34 +245,32 @@ func (qt *QueueTracker) setLimit(queuePath string, 
maxResource *resources.Resour
        childQueueTracker.maxResources = maxResource
 }
 
-func (qt *QueueTracker) headroom(queuePath string) *resources.Resource {
+func (qt *QueueTracker) headroom(hierarchy []string) *resources.Resource {
        log.Log(log.SchedUGM).Debug("Calculating headroom",
-               zap.String("queue path", queuePath))
-       childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
-       if childQueuePath != common.Empty {
-               if qt.childQueueTrackers[immediateChildQueueName] != nil {
-                       headroom := 
qt.childQueueTrackers[immediateChildQueueName].headroom(childQueuePath)
-                       if headroom != nil {
-                               return 
resources.ComponentWiseMinPermissive(headroom, qt.maxResources)
-                       }
-               } else {
-                       log.Log(log.SchedUGM).Error("Child queueTracker tracker 
must be available in child queues map",
-                               zap.String("child queueTracker name", 
immediateChildQueueName))
-                       return nil
+               zap.Strings("queue path", hierarchy))
+       // depth first: all the way to the leaf, create if not exists
+       // more than 1 in the slice means we need to recurse down
+       var headroom, childHeadroom *resources.Resource
+       if len(hierarchy) > 1 {
+               childName := hierarchy[1]
+               if qt.childQueueTrackers[childName] == nil {
+                       qt.childQueueTrackers[childName] = 
newQueueTracker(qt.queuePath, childName)
                }
+               childHeadroom = 
qt.childQueueTrackers[childName].headroom(hierarchy[1:])
        }
-
+       // arrived at the leaf or on the way out: check against current max if 
set
        if !resources.Equals(resources.NewResource(), qt.maxResources) {
-               headroom := qt.maxResources.Clone()
+               headroom = qt.maxResources.Clone()
                headroom.SubOnlyExisting(qt.resourceUsage)
                log.Log(log.SchedUGM).Debug("Calculated headroom",
-                       zap.String("queue path", queuePath),
-                       zap.String("queue", qt.queueName),
+                       zap.String("queue path", qt.queuePath),
                        zap.Stringer("max resource", qt.maxResources),
                        zap.Stringer("headroom", headroom))
-               return headroom
        }
-       return nil
+       if headroom == nil {
+               return childHeadroom
+       }
+       return resources.ComponentWiseMinPermissive(headroom, childHeadroom)
 }
 
 func (qt *QueueTracker) getResourceUsageDAOInfo(parentQueuePath string) 
*dao.ResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/queue_tracker_test.go 
b/pkg/scheduler/ugm/queue_tracker_test.go
index bcaabb35..55b79a68 100644
--- a/pkg/scheduler/ugm/queue_tracker_test.go
+++ b/pkg/scheduler/ugm/queue_tracker_test.go
@@ -19,10 +19,12 @@
 package ugm
 
 import (
+       "strings"
        "testing"
 
        "gotest.tools/v3/assert"
 
+       "github.com/apache/yunikorn-core/pkg/common/configs"
        "github.com/apache/yunikorn-core/pkg/common/resources"
 )
 
@@ -251,62 +253,66 @@ func TestQTQuotaEnforcement(t *testing.T) {
 }
 
 func TestHeadroom(t *testing.T) {
-       leafQT := newQueueTracker("root.parent", "leaf")
-
-       leafMaxRes, err := 
resources.NewResourceFromConf(map[string]string{"mem": "60M", "vcore": "60"})
-       if err != nil {
-               t.Errorf("new resource create returned error or wrong resource: 
error %t, res %v", err, leafMaxRes)
-       }
-
-       parentQT := newQueueTracker("root", "parent")
-       parentMaxRes := leafMaxRes.Clone()
-       resources.Multiply(parentMaxRes, 2)
-
-       rootQT := newQueueTracker("", "root")
-
-       parentQT.childQueueTrackers["leaf"] = leafQT
-       rootQT.childQueueTrackers["parent"] = parentQT
-
-       // Not even a single queue has been configured with max resource
-       headroom := rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, nil), true)
-
-       leafQT.maxResources = leafMaxRes
-       parentQT.maxResources = parentMaxRes
+       var nilResource *resources.Resource
+       path := "root.parent.leaf"
+       hierarchy := strings.Split(path, configs.DOT)
+
+       // nothing exists make sure the hierarchy gets created
+       root := newRootQueueTracker()
+       parent := root.getChildQueueTracker("root.parent")
+       assert.Assert(t, parent != nil, "parent queue tracker should have been 
created")
+       leaf := root.getChildQueueTracker(path)
+       assert.Assert(t, leaf != nil, "leaf queue tracker should have been 
created")
+
+       // auto created trackers no max resource set
+       headroom := root.headroom(hierarchy)
+       assert.Equal(t, headroom, nilResource, "auto create: expected nil 
resource")
+
+       // prep resources to set as usage and max
+       usage, err := resources.NewResourceFromConf(map[string]string{"mem": 
"10M", "vcore": "10"})
+       assert.NilError(t, err, "usage: new resource create returned error")
+       double := resources.Multiply(usage, 2)
+       leaf.maxResources = double
+       parent.maxResources = resources.Multiply(double, 2)
 
        // headroom should be equal to max cap of leaf queue as there is no 
usage so far
-       headroom = rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
-
-       leafResUsage, err := 
resources.NewResourceFromConf(map[string]string{"mem": "30M", "vcore": "30"})
-       if err != nil {
-               t.Errorf("new resource create returned error or wrong resource: 
error %t, res %v", err, leafResUsage)
-       }
-       leafQT.resourceUsage = leafResUsage
+       headroom = root.headroom(hierarchy)
+       assert.Assert(t, resources.Equals(headroom, double), "headroom not leaf 
max")
 
        // headroom should be equal to sub(max cap of leaf queue - resource 
usage) as there is some usage
-       headroom = rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, leafResUsage), true)
-
-       leafQT.maxResources = resources.Multiply(leafMaxRes, 2)
-       parentQT.maxResources = leafMaxRes
-
-       // headroom should be equal to min (leaf max resources, parent 
resources)
-       headroom = rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
-
-       parentQT.maxResources = resources.NewResource()
-
-       // headroom should be equal to sub(max cap of leaf queue - resource 
usage) as there is some usage in leaf and max res of both root and parent is nil
-       headroom = rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, resources.Add(leafMaxRes, 
leafResUsage)), true)
-
-       rootQT.maxResources = leafMaxRes
-
-       // headroom should be equal to min ( (sub(max cap of leaf queue - 
resource usage), root resources) as there is some usage in leaf
-       // and max res of parent is nil
-       headroom = rootQT.headroom("root.parent.leaf")
-       assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
+       leaf.resourceUsage = usage
+       headroom = root.headroom(hierarchy)
+       assert.Assert(t, resources.Equals(headroom, usage), "headroom should be 
same as usage")
+
+       // headroom should be equal to min headroom of parent and leaf: parent 
has none so zero
+       parent.maxResources = double
+       parent.resourceUsage = double
+       headroom = root.headroom(hierarchy)
+       assert.Assert(t, resources.IsZero(headroom), "leaf check: parent should 
have no headroom")
+
+       headroom = root.headroom(hierarchy[:2])
+       assert.Assert(t, resources.IsZero(headroom), "parent check: parent 
should have no headroom")
+
+       // reset usage for the parent
+       parent.resourceUsage = resources.NewResource()
+       // set a different type in the parent max and check it is in the 
headroom
+       var single, other *resources.Resource
+       single, err = resources.NewResourceFromConf(map[string]string{"gpu": 
"1"})
+       assert.NilError(t, err, "single: new resource create returned error")
+       parent.maxResources = single
+       single, err = resources.NewResourceFromConf(map[string]string{"gpu": 
"1"})
+       assert.NilError(t, err, "single: new resource create returned error")
+       combined := resources.Add(usage, single)
+       headroom = root.headroom(hierarchy)
+       assert.Assert(t, resources.Equals(headroom, combined), "headroom should 
be same as combined")
+
+       // this "other" resource should be completely ignored as it has no limit
+       other, err = resources.NewResourceFromConf(map[string]string{"unknown": 
"100"})
+       assert.NilError(t, err, "single: new resource create returned error")
+       parent.resourceUsage = other
+       root.resourceUsage = other
+       headroom = root.headroom(hierarchy)
+       assert.Assert(t, resources.Equals(headroom, combined), "headroom should 
be same as combined")
 }
 
 func getQTResource(qt *QueueTracker) map[string]*resources.Resource {
diff --git a/pkg/scheduler/ugm/user_tracker.go 
b/pkg/scheduler/ugm/user_tracker.go
index 6b3bba24..6f761cbf 100644
--- a/pkg/scheduler/ugm/user_tracker.go
+++ b/pkg/scheduler/ugm/user_tracker.go
@@ -19,11 +19,13 @@
 package ugm
 
 import (
+       "strings"
        "sync"
 
        "go.uber.org/zap"
 
        "github.com/apache/yunikorn-core/pkg/common"
+       "github.com/apache/yunikorn-core/pkg/common/configs"
        "github.com/apache/yunikorn-core/pkg/common/resources"
        "github.com/apache/yunikorn-core/pkg/log"
        "github.com/apache/yunikorn-core/pkg/webservice/dao"
@@ -124,7 +126,7 @@ func (ut *UserTracker) setLimits(queuePath string, resource 
*resources.Resource,
 func (ut *UserTracker) headroom(queuePath string) *resources.Resource {
        ut.Lock()
        defer ut.Unlock()
-       return ut.queueTracker.headroom(queuePath)
+       return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
 }
 
 func (ut *UserTracker) GetUserResourceUsageDAOInfo() 
*dao.UserResourceUsageDAOInfo {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to