This is an automated email from the ASF dual-hosted git repository.
mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new d1fe9983 [YUNIKORN-1944] calculate headroom for wildcard cases (#642)
d1fe9983 is described below
commit d1fe998363ee1101b2d423c05aace5e323b1af6b
Author: PoAn Yang <[email protected]>
AuthorDate: Fri Nov 3 14:55:09 2023 +0530
[YUNIKORN-1944] calculate headroom for wildcard cases (#642)
Closes: #642
Signed-off-by: Manikandan R <[email protected]>
---
pkg/scheduler/ugm/group_tracker.go | 2 +-
pkg/scheduler/ugm/manager_test.go | 58 +++++++++++++++++++++++++++++----
pkg/scheduler/ugm/queue_tracker.go | 28 ++++++++++++++--
pkg/scheduler/ugm/queue_tracker_test.go | 14 ++++----
pkg/scheduler/ugm/user_tracker.go | 2 +-
5 files changed, 86 insertions(+), 18 deletions(-)
diff --git a/pkg/scheduler/ugm/group_tracker.go
b/pkg/scheduler/ugm/group_tracker.go
index 62304216..f1b6272a 100644
--- a/pkg/scheduler/ugm/group_tracker.go
+++ b/pkg/scheduler/ugm/group_tracker.go
@@ -83,7 +83,7 @@ func (gt *GroupTracker) setLimits(queuePath string, resource
*resources.Resource
func (gt *GroupTracker) headroom(queuePath string) *resources.Resource {
gt.Lock()
defer gt.Unlock()
- return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
+ return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT),
group)
}
func (gt *GroupTracker) GetGroupResourceUsageDAOInfo()
*dao.GroupResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/manager_test.go
b/pkg/scheduler/ugm/manager_test.go
index 99da9444..53840b0a 100644
--- a/pkg/scheduler/ugm/manager_test.go
+++ b/pkg/scheduler/ugm/manager_test.go
@@ -895,9 +895,11 @@ func TestSeparateUserGroupHeadroom(t *testing.T) {
func TestUserGroupLimit(t *testing.T) { //nolint:funlen
testCases := []struct {
- name string
- user security.UserGroup
- conf configs.PartitionConfig
+ name string
+ user security.UserGroup
+ conf configs.PartitionConfig
+ initExpectedHeadroomResource map[string]string
+ finalExpectedHeadroomResource map[string]string
}{
// unmixed user and group limit
{
@@ -906,6 +908,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit([]string{"user1"}, nil,
mediumResource, 2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific user limit",
@@ -913,6 +917,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit([]string{"user1"}, nil,
largeResource, 1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a specific user limit and a
wildcard user limit for a not specific user",
@@ -921,6 +927,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 2),
createLimit([]string{"*"}, nil, mediumResource,
2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific user limit and a
wildcard user limit for a not specific user",
@@ -929,6 +937,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 2),
createLimit([]string{"*"}, nil, largeResource,
1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a specific user limit and a
wildcard user limit for a specific user",
@@ -937,6 +947,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
mediumResource, 2),
createLimit([]string{"*"}, nil, largeResource,
2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific user limit and a
wildcard user limit for a specific user",
@@ -945,6 +957,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 1),
createLimit([]string{"*"}, nil, largeResource,
2),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a wildcard user limit",
@@ -952,6 +966,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit(nil, []string{"*"}, mediumResource,
2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a wildcard user limit",
@@ -959,6 +975,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit(nil, []string{"*"}, largeResource,
1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a specific group limit",
@@ -966,6 +984,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit(nil, []string{"group1"},
mediumResource, 2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific group limit",
@@ -973,6 +993,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
conf: createConfigWithLimits([]configs.Limit{
createLimit(nil, []string{"group1"},
largeResource, 1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a specific group limit and a
wildcard group limit for a not specific group user",
@@ -981,6 +1003,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit(nil, []string{"group1"},
largeResource, 2),
createLimit(nil, []string{"*"}, mediumResource,
2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific group limit and
a wildcard group limit for a not specific group user",
@@ -989,6 +1013,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit(nil, []string{"group1"},
largeResource, 2),
createLimit(nil, []string{"*"}, largeResource,
1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with a specific group limit and a
wildcard group limit for a specific group user",
@@ -997,6 +1023,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit(nil, []string{"group1"},
mediumResource, 2),
createLimit(nil, []string{"*"}, largeResource,
2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with a specific group limit and
a wildcard group limit for a specific group user",
@@ -1005,6 +1033,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit(nil, []string{"group1"},
largeResource, 1),
createLimit(nil, []string{"*"}, largeResource,
2),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
// mixed user and group limit
{
@@ -1014,6 +1044,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
mediumResource, 2),
createLimit(nil, []string{"group1"},
largeResource, 2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with user limit lower than group
limit",
@@ -1022,6 +1054,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 1),
createLimit(nil, []string{"group1"},
largeResource, 2),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
{
name: "maxresources with gorup limit lower than user
limit",
@@ -1030,6 +1064,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 2),
createLimit(nil, []string{"group1"},
mediumResource, 2),
}),
+ initExpectedHeadroomResource: mediumResource,
+ finalExpectedHeadroomResource: nil,
},
{
name: "maxapplications with group limit lower than user
limit",
@@ -1038,6 +1074,8 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
createLimit([]string{"user1"}, nil,
largeResource, 2),
createLimit(nil, []string{"group1"},
largeResource, 1),
}),
+ initExpectedHeadroomResource: largeResource,
+ finalExpectedHeadroomResource: mediumResource,
},
}
@@ -1050,9 +1088,12 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
assert.NilError(t,
manager.UpdateConfig(tc.conf.Queues[0], "root"))
usage, err :=
resources.NewResourceFromConf(mediumResource)
- if err != nil {
- t.Errorf("new resource create returned error or
wrong resource: error %t, res %v", err, usage)
- }
+ assert.NilError(t, err, fmt.Sprintf("can't create
resource from %v", mediumResource))
+
+ initExpectedHeadroom, err :=
resources.NewResourceFromConf(tc.initExpectedHeadroomResource)
+ assert.NilError(t, err, fmt.Sprintf("can't create
resource from %v", tc.initExpectedHeadroomResource))
+ headroom := manager.Headroom(queuePathParent, TestApp1,
tc.user)
+ assert.Equal(t, resources.Equals(headroom,
initExpectedHeadroom), true, "init headroom is not expected")
increased :=
manager.IncreaseTrackedResource(queuePathParent, TestApp1, usage, tc.user)
assert.Equal(t, increased, true, "unable to increase
tracked resource: queuepath "+queuePathParent+", app "+TestApp1+", res
"+usage.String())
@@ -1064,6 +1105,11 @@ func TestUserGroupLimit(t *testing.T) { //nolint:funlen
assert.Equal(t,
resources.Equals(userTracker.queueTracker.childQueueTrackers["parent"].resourceUsage,
usage), true, "user tracker resource usage is not expected at root.parent
level")
assert.Equal(t,
userTracker.queueTracker.childQueueTrackers["parent"].runningApplications[TestApp1],
true, fmt.Sprintf("%s is not in runningApplications for user tracker %s at
root.parent level", TestApp1, tc.user.User))
+ finalExpectedHeadroom, err :=
resources.NewResourceFromConf(tc.finalExpectedHeadroomResource)
+ assert.NilError(t, err, fmt.Sprintf("can't create
resource from %v", tc.finalExpectedHeadroomResource))
+ headroom = manager.Headroom(queuePathParent, TestApp1,
tc.user)
+ assert.Equal(t, resources.Equals(headroom,
finalExpectedHeadroom), true, "final headroom is not expected")
+
increased =
manager.IncreaseTrackedResource(queuePathParent, TestApp2, usage, tc.user)
assert.Equal(t, increased, false, "should not increase
tracked resource: queuepath "+queuePathParent+", app "+TestApp2+", res
"+usage.String())
})
diff --git a/pkg/scheduler/ugm/queue_tracker.go
b/pkg/scheduler/ugm/queue_tracker.go
index 39124c25..a9a37efd 100644
--- a/pkg/scheduler/ugm/queue_tracker.go
+++ b/pkg/scheduler/ugm/queue_tracker.go
@@ -237,9 +237,12 @@ func (qt *QueueTracker) setLimit(hierarchy []string,
maxResource *resources.Reso
}
}
-func (qt *QueueTracker) headroom(hierarchy []string) *resources.Resource {
+func (qt *QueueTracker) headroom(hierarchy []string, trackType trackingType)
*resources.Resource {
log.Log(log.SchedUGM).Debug("Calculating headroom",
- zap.Strings("queue path", hierarchy))
+ zap.String("queue path", qt.queuePath),
+ zap.Strings("hierarchy", hierarchy),
+ zap.Int("tracking type", int(trackType)),
+ )
// depth first: all the way to the leaf, create if not exists
// more than 1 in the slice means we need to recurse down
var headroom, childHeadroom *resources.Resource
@@ -248,7 +251,7 @@ func (qt *QueueTracker) headroom(hierarchy []string)
*resources.Resource {
if qt.childQueueTrackers[childName] == nil {
qt.childQueueTrackers[childName] =
newQueueTracker(qt.queuePath, childName)
}
- childHeadroom =
qt.childQueueTrackers[childName].headroom(hierarchy[1:])
+ childHeadroom =
qt.childQueueTrackers[childName].headroom(hierarchy[1:], trackType)
}
// arrived at the leaf or on the way out: check against current max if
set
@@ -257,8 +260,27 @@ func (qt *QueueTracker) headroom(hierarchy []string)
*resources.Resource {
headroom.SubOnlyExisting(qt.resourceUsage)
log.Log(log.SchedUGM).Debug("Calculated headroom",
zap.String("queue path", qt.queuePath),
+ zap.Int("tracking type", int(trackType)),
zap.Stringer("max resource", qt.maxResources),
zap.Stringer("headroom", headroom))
+ } else if resources.Equals(nil, childHeadroom) {
+ // If childHeadroom is not nil, it means there is an user or
wildcard limit config in child queue,
+ // so we don't check wildcard limit config in current queue.
+
+ // Fall back on wild card user limit settings to calculate
headroom only for unnamed users.
+ // For unnamed groups, "*" group tracker object would be used
using the above block to calculate headroom
+ // because resource usage added together for all unnamed groups
under "*" group tracker object.
+ if trackType == user {
+ if config :=
m.getUserWildCardLimitsConfig(qt.queuePath); config != nil {
+ headroom = config.maxResources.Clone()
+ headroom.SubOnlyExisting(qt.resourceUsage)
+ log.Log(log.SchedUGM).Debug("Calculated
headroom",
+ zap.String("queue path", qt.queuePath),
+ zap.Int("tracking type",
int(trackType)),
+ zap.Stringer("wildcard max resource",
config.maxResources),
+ zap.Stringer("headroom", headroom))
+ }
+ }
}
if headroom == nil {
return childHeadroom
diff --git a/pkg/scheduler/ugm/queue_tracker_test.go
b/pkg/scheduler/ugm/queue_tracker_test.go
index 38bd189e..9537b3ba 100644
--- a/pkg/scheduler/ugm/queue_tracker_test.go
+++ b/pkg/scheduler/ugm/queue_tracker_test.go
@@ -267,7 +267,7 @@ func TestHeadroom(t *testing.T) {
assert.Assert(t, leaf != nil, "leaf queue tracker should have been
created")
// auto created trackers no max resource set
- headroom := root.headroom(hierarchy)
+ headroom := root.headroom(hierarchy, none)
assert.Equal(t, headroom, nilResource, "auto create: expected nil
resource")
// prep resources to set as usage and max
@@ -278,21 +278,21 @@ func TestHeadroom(t *testing.T) {
parent.maxResources = resources.Multiply(double, 2)
// headroom should be equal to max cap of leaf queue as there is no
usage so far
- headroom = root.headroom(hierarchy)
+ headroom = root.headroom(hierarchy, none)
assert.Assert(t, resources.Equals(headroom, double), "headroom not leaf
max")
// headroom should be equal to sub(max cap of leaf queue - resource
usage) as there is some usage
leaf.resourceUsage = usage
- headroom = root.headroom(hierarchy)
+ headroom = root.headroom(hierarchy, none)
assert.Assert(t, resources.Equals(headroom, usage), "headroom should be
same as usage")
// headroom should be equal to min headroom of parent and leaf: parent
has none so zero
parent.maxResources = double
parent.resourceUsage = double
- headroom = root.headroom(hierarchy)
+ headroom = root.headroom(hierarchy, none)
assert.Assert(t, resources.IsZero(headroom), "leaf check: parent should
have no headroom")
- headroom = root.headroom(hierarchy[:2])
+ headroom = root.headroom(hierarchy[:2], none)
assert.Assert(t, resources.IsZero(headroom), "parent check: parent
should have no headroom")
// reset usage for the parent
@@ -305,7 +305,7 @@ func TestHeadroom(t *testing.T) {
single, err = resources.NewResourceFromConf(map[string]string{"gpu":
"1"})
assert.NilError(t, err, "single: new resource create returned error")
combined := resources.Add(usage, single)
- headroom = root.headroom(hierarchy)
+ headroom = root.headroom(hierarchy, none)
assert.Assert(t, resources.Equals(headroom, combined), "headroom should
be same as combined")
// this "other" resource should be completely ignored as it has no limit
@@ -313,7 +313,7 @@ func TestHeadroom(t *testing.T) {
assert.NilError(t, err, "single: new resource create returned error")
parent.resourceUsage = other
root.resourceUsage = other
- headroom = root.headroom(hierarchy)
+ headroom = root.headroom(hierarchy, none)
assert.Assert(t, resources.Equals(headroom, combined), "headroom should
be same as combined")
}
diff --git a/pkg/scheduler/ugm/user_tracker.go
b/pkg/scheduler/ugm/user_tracker.go
index 5e842d58..1a4e2c81 100644
--- a/pkg/scheduler/ugm/user_tracker.go
+++ b/pkg/scheduler/ugm/user_tracker.go
@@ -127,7 +127,7 @@ func (ut *UserTracker) setLimits(queuePath string, resource
*resources.Resource,
func (ut *UserTracker) headroom(queuePath string) *resources.Resource {
ut.Lock()
defer ut.Unlock()
- return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
+ return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT),
user)
}
func (ut *UserTracker) GetUserResourceUsageDAOInfo()
*dao.UserResourceUsageDAOInfo {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]