This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new d1fe9983 [YUNIKORN-1944] calculate headroom for wildcard cases (#642)
d1fe9983 is described below

commit d1fe998363ee1101b2d423c05aace5e323b1af6b
Author: PoAn Yang <[email protected]>
AuthorDate: Fri Nov 3 14:55:09 2023 +0530

    [YUNIKORN-1944] calculate headroom for wildcard cases (#642)
    
    Closes: #642
    
    Signed-off-by: Manikandan R <[email protected]>
---
 pkg/scheduler/ugm/group_tracker.go      |  2 +-
 pkg/scheduler/ugm/manager_test.go       | 58 +++++++++++++++++++++++++++++----
 pkg/scheduler/ugm/queue_tracker.go      | 28 ++++++++++++++--
 pkg/scheduler/ugm/queue_tracker_test.go | 14 ++++----
 pkg/scheduler/ugm/user_tracker.go       |  2 +-
 5 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/pkg/scheduler/ugm/group_tracker.go 
b/pkg/scheduler/ugm/group_tracker.go
index 62304216..f1b6272a 100644
--- a/pkg/scheduler/ugm/group_tracker.go
+++ b/pkg/scheduler/ugm/group_tracker.go
@@ -83,7 +83,7 @@ func (gt *GroupTracker) setLimits(queuePath string, resource 
*resources.Resource
 func (gt *GroupTracker) headroom(queuePath string) *resources.Resource {
        gt.Lock()
        defer gt.Unlock()
-       return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
+       return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT), 
group)
 }
 
 func (gt *GroupTracker) GetGroupResourceUsageDAOInfo() 
*dao.GroupResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/manager_test.go 
b/pkg/scheduler/ugm/manager_test.go
index 99da9444..53840b0a 100644
--- a/pkg/scheduler/ugm/manager_test.go
+++ b/pkg/scheduler/ugm/manager_test.go
@@ -895,9 +895,11 @@ func TestSeparateUserGroupHeadroom(t *testing.T) {
 
 func TestUserGroupLimit(t *testing.T) { //nolint:funlen
        testCases := []struct {
-               name string
-               user security.UserGroup
-               conf configs.PartitionConfig
+               name                          string
+               user                          security.UserGroup
+               conf                          configs.PartitionConfig
+               initExpectedHeadroomResource  map[string]string
+               finalExpectedHeadroomResource map[string]string
        }{
                // unmixed user and group limit
                {
@@ -906,6 +908,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit([]string{"user1"}, nil, 
mediumResource, 2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific user limit",
@@ -913,6 +917,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit([]string{"user1"}, nil, 
largeResource, 1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a specific user limit and a 
wildcard user limit for a not specific user",
@@ -921,6 +927,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 2),
                                createLimit([]string{"*"}, nil, mediumResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific user limit and a 
wildcard user limit for a not specific user",
@@ -929,6 +937,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 2),
                                createLimit([]string{"*"}, nil, largeResource, 
1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a specific user limit and a 
wildcard user limit for a specific user",
@@ -937,6 +947,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
mediumResource, 2),
                                createLimit([]string{"*"}, nil, largeResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific user limit and a 
wildcard user limit for a specific user",
@@ -945,6 +957,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 1),
                                createLimit([]string{"*"}, nil, largeResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a wildcard user limit",
@@ -952,6 +966,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit(nil, []string{"*"}, mediumResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a wildcard user limit",
@@ -959,6 +975,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit(nil, []string{"*"}, largeResource, 
1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a specific group limit",
@@ -966,6 +984,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit(nil, []string{"group1"}, 
mediumResource, 2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific group limit",
@@ -973,6 +993,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        conf: createConfigWithLimits([]configs.Limit{
                                createLimit(nil, []string{"group1"}, 
largeResource, 1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a specific group limit and a 
wildcard group limit for a not specific group user",
@@ -981,6 +1003,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit(nil, []string{"group1"}, 
largeResource, 2),
                                createLimit(nil, []string{"*"}, mediumResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific group limit and 
a wildcard group limit for a not specific group user",
@@ -989,6 +1013,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit(nil, []string{"group1"}, 
largeResource, 2),
                                createLimit(nil, []string{"*"}, largeResource, 
1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with a specific group limit and a 
wildcard group limit for a specific group user",
@@ -997,6 +1023,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit(nil, []string{"group1"}, 
mediumResource, 2),
                                createLimit(nil, []string{"*"}, largeResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with a specific group limit and 
a wildcard group limit for a specific group user",
@@ -1005,6 +1033,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit(nil, []string{"group1"}, 
largeResource, 1),
                                createLimit(nil, []string{"*"}, largeResource, 
2),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                // mixed user and group limit
                {
@@ -1014,6 +1044,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
mediumResource, 2),
                                createLimit(nil, []string{"group1"}, 
largeResource, 2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with user limit lower than group 
limit",
@@ -1022,6 +1054,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 1),
                                createLimit(nil, []string{"group1"}, 
largeResource, 2),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
                {
                        name: "maxresources with gorup limit lower than user 
limit",
@@ -1030,6 +1064,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 2),
                                createLimit(nil, []string{"group1"}, 
mediumResource, 2),
                        }),
+                       initExpectedHeadroomResource:  mediumResource,
+                       finalExpectedHeadroomResource: nil,
                },
                {
                        name: "maxapplications with group limit lower than user 
limit",
@@ -1038,6 +1074,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                                createLimit([]string{"user1"}, nil, 
largeResource, 2),
                                createLimit(nil, []string{"group1"}, 
largeResource, 1),
                        }),
+                       initExpectedHeadroomResource:  largeResource,
+                       finalExpectedHeadroomResource: mediumResource,
                },
        }
 
@@ -1050,9 +1088,12 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        assert.NilError(t, 
manager.UpdateConfig(tc.conf.Queues[0], "root"))
 
                        usage, err := 
resources.NewResourceFromConf(mediumResource)
-                       if err != nil {
-                               t.Errorf("new resource create returned error or 
wrong resource: error %t, res %v", err, usage)
-                       }
+                       assert.NilError(t, err, fmt.Sprintf("can't create 
resource from %v", mediumResource))
+
+                       initExpectedHeadroom, err := 
resources.NewResourceFromConf(tc.initExpectedHeadroomResource)
+                       assert.NilError(t, err, fmt.Sprintf("can't create 
resource from %v", tc.initExpectedHeadroomResource))
+                       headroom := manager.Headroom(queuePathParent, TestApp1, 
tc.user)
+                       assert.Equal(t, resources.Equals(headroom, 
initExpectedHeadroom), true, "init headroom is not expected")
 
                        increased := 
manager.IncreaseTrackedResource(queuePathParent, TestApp1, usage, tc.user)
                        assert.Equal(t, increased, true, "unable to increase 
tracked resource: queuepath "+queuePathParent+", app "+TestApp1+", res 
"+usage.String())
@@ -1064,6 +1105,11 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
                        assert.Equal(t, 
resources.Equals(userTracker.queueTracker.childQueueTrackers["parent"].resourceUsage,
 usage), true, "user tracker resource usage is not expected at root.parent 
level")
                        assert.Equal(t, 
userTracker.queueTracker.childQueueTrackers["parent"].runningApplications[TestApp1],
 true, fmt.Sprintf("%s is not in runningApplications for user tracker %s at 
root.parent level", TestApp1, tc.user.User))
 
+                       finalExpectedHeadroom, err := 
resources.NewResourceFromConf(tc.finalExpectedHeadroomResource)
+                       assert.NilError(t, err, fmt.Sprintf("can't create 
resource from %v", tc.finalExpectedHeadroomResource))
+                       headroom = manager.Headroom(queuePathParent, TestApp1, 
tc.user)
+                       assert.Equal(t, resources.Equals(headroom, 
finalExpectedHeadroom), true, "final headroom is not expected")
+
                        increased = 
manager.IncreaseTrackedResource(queuePathParent, TestApp2, usage, tc.user)
                        assert.Equal(t, increased, false, "should not increase 
tracked resource: queuepath "+queuePathParent+", app "+TestApp2+", res 
"+usage.String())
                })
diff --git a/pkg/scheduler/ugm/queue_tracker.go 
b/pkg/scheduler/ugm/queue_tracker.go
index 39124c25..a9a37efd 100644
--- a/pkg/scheduler/ugm/queue_tracker.go
+++ b/pkg/scheduler/ugm/queue_tracker.go
@@ -237,9 +237,12 @@ func (qt *QueueTracker) setLimit(hierarchy []string, 
maxResource *resources.Reso
        }
 }
 
-func (qt *QueueTracker) headroom(hierarchy []string) *resources.Resource {
+func (qt *QueueTracker) headroom(hierarchy []string, trackType trackingType) 
*resources.Resource {
        log.Log(log.SchedUGM).Debug("Calculating headroom",
-               zap.Strings("queue path", hierarchy))
+               zap.String("queue path", qt.queuePath),
+               zap.Strings("hierarchy", hierarchy),
+               zap.Int("tracking type", int(trackType)),
+       )
        // depth first: all the way to the leaf, create if not exists
        // more than 1 in the slice means we need to recurse down
        var headroom, childHeadroom *resources.Resource
@@ -248,7 +251,7 @@ func (qt *QueueTracker) headroom(hierarchy []string) 
*resources.Resource {
                if qt.childQueueTrackers[childName] == nil {
                        qt.childQueueTrackers[childName] = 
newQueueTracker(qt.queuePath, childName)
                }
-               childHeadroom = 
qt.childQueueTrackers[childName].headroom(hierarchy[1:])
+               childHeadroom = 
qt.childQueueTrackers[childName].headroom(hierarchy[1:], trackType)
        }
 
        // arrived at the leaf or on the way out: check against current max if 
set
@@ -257,8 +260,27 @@ func (qt *QueueTracker) headroom(hierarchy []string) 
*resources.Resource {
                headroom.SubOnlyExisting(qt.resourceUsage)
                log.Log(log.SchedUGM).Debug("Calculated headroom",
                        zap.String("queue path", qt.queuePath),
+                       zap.Int("tracking type", int(trackType)),
                        zap.Stringer("max resource", qt.maxResources),
                        zap.Stringer("headroom", headroom))
+       } else if resources.Equals(nil, childHeadroom) {
+               // If childHeadroom is not nil, it means there is an user or 
wildcard limit config in child queue,
+               // so we don't check wildcard limit config in current queue.
+
+               // Fall back on wild card user limit settings to calculate 
headroom only for unnamed users.
+               // For unnamed groups, "*" group tracker object would be used 
using the above block to calculate headroom
+               // because resource usage added together for all unnamed groups 
under "*" group tracker object.
+               if trackType == user {
+                       if config := 
m.getUserWildCardLimitsConfig(qt.queuePath); config != nil {
+                               headroom = config.maxResources.Clone()
+                               headroom.SubOnlyExisting(qt.resourceUsage)
+                               log.Log(log.SchedUGM).Debug("Calculated 
headroom",
+                                       zap.String("queue path", qt.queuePath),
+                                       zap.Int("tracking type", 
int(trackType)),
+                                       zap.Stringer("wildcard max resource", 
config.maxResources),
+                                       zap.Stringer("headroom", headroom))
+                       }
+               }
        }
        if headroom == nil {
                return childHeadroom
diff --git a/pkg/scheduler/ugm/queue_tracker_test.go 
b/pkg/scheduler/ugm/queue_tracker_test.go
index 38bd189e..9537b3ba 100644
--- a/pkg/scheduler/ugm/queue_tracker_test.go
+++ b/pkg/scheduler/ugm/queue_tracker_test.go
@@ -267,7 +267,7 @@ func TestHeadroom(t *testing.T) {
        assert.Assert(t, leaf != nil, "leaf queue tracker should have been 
created")
 
        // auto created trackers no max resource set
-       headroom := root.headroom(hierarchy)
+       headroom := root.headroom(hierarchy, none)
        assert.Equal(t, headroom, nilResource, "auto create: expected nil 
resource")
 
        // prep resources to set as usage and max
@@ -278,21 +278,21 @@ func TestHeadroom(t *testing.T) {
        parent.maxResources = resources.Multiply(double, 2)
 
        // headroom should be equal to max cap of leaf queue as there is no 
usage so far
-       headroom = root.headroom(hierarchy)
+       headroom = root.headroom(hierarchy, none)
        assert.Assert(t, resources.Equals(headroom, double), "headroom not leaf 
max")
 
        // headroom should be equal to sub(max cap of leaf queue - resource 
usage) as there is some usage
        leaf.resourceUsage = usage
-       headroom = root.headroom(hierarchy)
+       headroom = root.headroom(hierarchy, none)
        assert.Assert(t, resources.Equals(headroom, usage), "headroom should be 
same as usage")
 
        // headroom should be equal to min headroom of parent and leaf: parent 
has none so zero
        parent.maxResources = double
        parent.resourceUsage = double
-       headroom = root.headroom(hierarchy)
+       headroom = root.headroom(hierarchy, none)
        assert.Assert(t, resources.IsZero(headroom), "leaf check: parent should 
have no headroom")
 
-       headroom = root.headroom(hierarchy[:2])
+       headroom = root.headroom(hierarchy[:2], none)
        assert.Assert(t, resources.IsZero(headroom), "parent check: parent 
should have no headroom")
 
        // reset usage for the parent
@@ -305,7 +305,7 @@ func TestHeadroom(t *testing.T) {
        single, err = resources.NewResourceFromConf(map[string]string{"gpu": 
"1"})
        assert.NilError(t, err, "single: new resource create returned error")
        combined := resources.Add(usage, single)
-       headroom = root.headroom(hierarchy)
+       headroom = root.headroom(hierarchy, none)
        assert.Assert(t, resources.Equals(headroom, combined), "headroom should 
be same as combined")
 
        // this "other" resource should be completely ignored as it has no limit
@@ -313,7 +313,7 @@ func TestHeadroom(t *testing.T) {
        assert.NilError(t, err, "single: new resource create returned error")
        parent.resourceUsage = other
        root.resourceUsage = other
-       headroom = root.headroom(hierarchy)
+       headroom = root.headroom(hierarchy, none)
        assert.Assert(t, resources.Equals(headroom, combined), "headroom should 
be same as combined")
 }
 
diff --git a/pkg/scheduler/ugm/user_tracker.go 
b/pkg/scheduler/ugm/user_tracker.go
index 5e842d58..1a4e2c81 100644
--- a/pkg/scheduler/ugm/user_tracker.go
+++ b/pkg/scheduler/ugm/user_tracker.go
@@ -127,7 +127,7 @@ func (ut *UserTracker) setLimits(queuePath string, resource 
*resources.Resource,
 func (ut *UserTracker) headroom(queuePath string) *resources.Resource {
        ut.Lock()
        defer ut.Unlock()
-       return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
+       return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT), 
user)
 }
 
 func (ut *UserTracker) GetUserResourceUsageDAOInfo() 
*dao.UserResourceUsageDAOInfo {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to