This is an automated email from the ASF dual-hosted git repository.
mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new f83134b0 [YUNIKORN-1920] headroom with parent queue usage (#620)
f83134b0 is described below
commit f83134b0776bef792141085d23d37295140587cd
Author: Wilfred Spiegelenburg <[email protected]>
AuthorDate: Tue Aug 29 12:39:41 2023 +0530
[YUNIKORN-1920] headroom with parent queue usage (#620)
Headroom needs to take into account the usage in each queue and not
assume that the full max resource is available if the queue is not the
leaf queue.
Add a queue traker object for any queue that is not found while
traversing the hierarchy. The user/group has an application running in
that queue and will add usage at some point.
Add tests to cover usage and max with different resource types set.
Closes: #620
Signed-off-by: Manikandan R <[email protected]>
---
pkg/scheduler/ugm/group_tracker.go | 4 +-
pkg/scheduler/ugm/queue_tracker.go | 36 +++++------
pkg/scheduler/ugm/queue_tracker_test.go | 110 +++++++++++++++++---------------
pkg/scheduler/ugm/user_tracker.go | 4 +-
4 files changed, 81 insertions(+), 73 deletions(-)
diff --git a/pkg/scheduler/ugm/group_tracker.go
b/pkg/scheduler/ugm/group_tracker.go
index 150016c5..1830df2a 100644
--- a/pkg/scheduler/ugm/group_tracker.go
+++ b/pkg/scheduler/ugm/group_tracker.go
@@ -19,9 +19,11 @@
package ugm
import (
+ "strings"
"sync"
"github.com/apache/yunikorn-core/pkg/common"
+ "github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
"github.com/apache/yunikorn-core/pkg/webservice/dao"
)
@@ -81,7 +83,7 @@ func (gt *GroupTracker) setLimits(queuePath string, resource
*resources.Resource
func (gt *GroupTracker) headroom(queuePath string) *resources.Resource {
gt.Lock()
defer gt.Unlock()
- return gt.queueTracker.headroom(queuePath)
+ return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
}
func (gt *GroupTracker) GetGroupResourceUsageDAOInfo()
*dao.GroupResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/queue_tracker.go
b/pkg/scheduler/ugm/queue_tracker.go
index 8dc0fe99..70c70bea 100644
--- a/pkg/scheduler/ugm/queue_tracker.go
+++ b/pkg/scheduler/ugm/queue_tracker.go
@@ -245,34 +245,32 @@ func (qt *QueueTracker) setLimit(queuePath string,
maxResource *resources.Resour
childQueueTracker.maxResources = maxResource
}
-func (qt *QueueTracker) headroom(queuePath string) *resources.Resource {
+func (qt *QueueTracker) headroom(hierarchy []string) *resources.Resource {
log.Log(log.SchedUGM).Debug("Calculating headroom",
- zap.String("queue path", queuePath))
- childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
- if childQueuePath != common.Empty {
- if qt.childQueueTrackers[immediateChildQueueName] != nil {
- headroom :=
qt.childQueueTrackers[immediateChildQueueName].headroom(childQueuePath)
- if headroom != nil {
- return
resources.ComponentWiseMinPermissive(headroom, qt.maxResources)
- }
- } else {
- log.Log(log.SchedUGM).Error("Child queueTracker tracker
must be available in child queues map",
- zap.String("child queueTracker name",
immediateChildQueueName))
- return nil
+ zap.Strings("queue path", hierarchy))
+ // depth first: all the way to the leaf, create if not exists
+ // more than 1 in the slice means we need to recurse down
+ var headroom, childHeadroom *resources.Resource
+ if len(hierarchy) > 1 {
+ childName := hierarchy[1]
+ if qt.childQueueTrackers[childName] == nil {
+ qt.childQueueTrackers[childName] =
newQueueTracker(qt.queuePath, childName)
}
+ childHeadroom =
qt.childQueueTrackers[childName].headroom(hierarchy[1:])
}
-
+ // arrived at the leaf or on the way out: check against current max if
set
if !resources.Equals(resources.NewResource(), qt.maxResources) {
- headroom := qt.maxResources.Clone()
+ headroom = qt.maxResources.Clone()
headroom.SubOnlyExisting(qt.resourceUsage)
log.Log(log.SchedUGM).Debug("Calculated headroom",
- zap.String("queue path", queuePath),
- zap.String("queue", qt.queueName),
+ zap.String("queue path", qt.queuePath),
zap.Stringer("max resource", qt.maxResources),
zap.Stringer("headroom", headroom))
- return headroom
}
- return nil
+ if headroom == nil {
+ return childHeadroom
+ }
+ return resources.ComponentWiseMinPermissive(headroom, childHeadroom)
}
func (qt *QueueTracker) getResourceUsageDAOInfo(parentQueuePath string)
*dao.ResourceUsageDAOInfo {
diff --git a/pkg/scheduler/ugm/queue_tracker_test.go
b/pkg/scheduler/ugm/queue_tracker_test.go
index bcaabb35..55b79a68 100644
--- a/pkg/scheduler/ugm/queue_tracker_test.go
+++ b/pkg/scheduler/ugm/queue_tracker_test.go
@@ -19,10 +19,12 @@
package ugm
import (
+ "strings"
"testing"
"gotest.tools/v3/assert"
+ "github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
)
@@ -251,62 +253,66 @@ func TestQTQuotaEnforcement(t *testing.T) {
}
func TestHeadroom(t *testing.T) {
- leafQT := newQueueTracker("root.parent", "leaf")
-
- leafMaxRes, err :=
resources.NewResourceFromConf(map[string]string{"mem": "60M", "vcore": "60"})
- if err != nil {
- t.Errorf("new resource create returned error or wrong resource:
error %t, res %v", err, leafMaxRes)
- }
-
- parentQT := newQueueTracker("root", "parent")
- parentMaxRes := leafMaxRes.Clone()
- resources.Multiply(parentMaxRes, 2)
-
- rootQT := newQueueTracker("", "root")
-
- parentQT.childQueueTrackers["leaf"] = leafQT
- rootQT.childQueueTrackers["parent"] = parentQT
-
- // Not even a single queue has been configured with max resource
- headroom := rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, nil), true)
-
- leafQT.maxResources = leafMaxRes
- parentQT.maxResources = parentMaxRes
+ var nilResource *resources.Resource
+ path := "root.parent.leaf"
+ hierarchy := strings.Split(path, configs.DOT)
+
+ // nothing exists make sure the hierarchy gets created
+ root := newRootQueueTracker()
+ parent := root.getChildQueueTracker("root.parent")
+ assert.Assert(t, parent != nil, "parent queue tracker should have been
created")
+ leaf := root.getChildQueueTracker(path)
+ assert.Assert(t, leaf != nil, "leaf queue tracker should have been
created")
+
+ // auto created trackers no max resource set
+ headroom := root.headroom(hierarchy)
+ assert.Equal(t, headroom, nilResource, "auto create: expected nil
resource")
+
+ // prep resources to set as usage and max
+ usage, err := resources.NewResourceFromConf(map[string]string{"mem":
"10M", "vcore": "10"})
+ assert.NilError(t, err, "usage: new resource create returned error")
+ double := resources.Multiply(usage, 2)
+ leaf.maxResources = double
+ parent.maxResources = resources.Multiply(double, 2)
// headroom should be equal to max cap of leaf queue as there is no
usage so far
- headroom = rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
-
- leafResUsage, err :=
resources.NewResourceFromConf(map[string]string{"mem": "30M", "vcore": "30"})
- if err != nil {
- t.Errorf("new resource create returned error or wrong resource:
error %t, res %v", err, leafResUsage)
- }
- leafQT.resourceUsage = leafResUsage
+ headroom = root.headroom(hierarchy)
+ assert.Assert(t, resources.Equals(headroom, double), "headroom not leaf
max")
// headroom should be equal to sub(max cap of leaf queue - resource
usage) as there is some usage
- headroom = rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, leafResUsage), true)
-
- leafQT.maxResources = resources.Multiply(leafMaxRes, 2)
- parentQT.maxResources = leafMaxRes
-
- // headroom should be equal to min (leaf max resources, parent
resources)
- headroom = rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
-
- parentQT.maxResources = resources.NewResource()
-
- // headroom should be equal to sub(max cap of leaf queue - resource
usage) as there is some usage in leaf and max res of both root and parent is nil
- headroom = rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, resources.Add(leafMaxRes,
leafResUsage)), true)
-
- rootQT.maxResources = leafMaxRes
-
- // headroom should be equal to min ( (sub(max cap of leaf queue -
resource usage), root resources) as there is some usage in leaf
- // and max res of parent is nil
- headroom = rootQT.headroom("root.parent.leaf")
- assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
+ leaf.resourceUsage = usage
+ headroom = root.headroom(hierarchy)
+ assert.Assert(t, resources.Equals(headroom, usage), "headroom should be
same as usage")
+
+ // headroom should be equal to min headroom of parent and leaf: parent
has none so zero
+ parent.maxResources = double
+ parent.resourceUsage = double
+ headroom = root.headroom(hierarchy)
+ assert.Assert(t, resources.IsZero(headroom), "leaf check: parent should
have no headroom")
+
+ headroom = root.headroom(hierarchy[:2])
+ assert.Assert(t, resources.IsZero(headroom), "parent check: parent
should have no headroom")
+
+ // reset usage for the parent
+ parent.resourceUsage = resources.NewResource()
+ // set a different type in the parent max and check it is in the
headroom
+ var single, other *resources.Resource
+ single, err = resources.NewResourceFromConf(map[string]string{"gpu":
"1"})
+ assert.NilError(t, err, "single: new resource create returned error")
+ parent.maxResources = single
+ single, err = resources.NewResourceFromConf(map[string]string{"gpu":
"1"})
+ assert.NilError(t, err, "single: new resource create returned error")
+ combined := resources.Add(usage, single)
+ headroom = root.headroom(hierarchy)
+ assert.Assert(t, resources.Equals(headroom, combined), "headroom should
be same as combined")
+
+ // this "other" resource should be completely ignored as it has no limit
+ other, err = resources.NewResourceFromConf(map[string]string{"unknown":
"100"})
+ assert.NilError(t, err, "single: new resource create returned error")
+ parent.resourceUsage = other
+ root.resourceUsage = other
+ headroom = root.headroom(hierarchy)
+ assert.Assert(t, resources.Equals(headroom, combined), "headroom should
be same as combined")
}
func getQTResource(qt *QueueTracker) map[string]*resources.Resource {
diff --git a/pkg/scheduler/ugm/user_tracker.go
b/pkg/scheduler/ugm/user_tracker.go
index 6b3bba24..6f761cbf 100644
--- a/pkg/scheduler/ugm/user_tracker.go
+++ b/pkg/scheduler/ugm/user_tracker.go
@@ -19,11 +19,13 @@
package ugm
import (
+ "strings"
"sync"
"go.uber.org/zap"
"github.com/apache/yunikorn-core/pkg/common"
+ "github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
"github.com/apache/yunikorn-core/pkg/log"
"github.com/apache/yunikorn-core/pkg/webservice/dao"
@@ -124,7 +126,7 @@ func (ut *UserTracker) setLimits(queuePath string, resource
*resources.Resource,
func (ut *UserTracker) headroom(queuePath string) *resources.Resource {
ut.Lock()
defer ut.Unlock()
- return ut.queueTracker.headroom(queuePath)
+ return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
}
func (ut *UserTracker) GetUserResourceUsageDAOInfo()
*dao.UserResourceUsageDAOInfo {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]