craigcondit commented on code in PR #960:
URL: https://github.com/apache/yunikorn-core/pull/960#discussion_r1752702277
##########
pkg/scheduler/objects/queue.go:
##########
@@ -1659,17 +1659,21 @@ func (sq *Queue) SupportTaskGroup() bool {
func (sq *Queue) updateGuaranteedResourceMetrics() {
if sq.guaranteedResource != nil {
for k, v := range sq.guaranteedResource.Resources {
-
metrics.GetQueueMetrics(sq.QueuePath).SetQueueGuaranteedResourceMetrics(k,
float64(v))
+
metrics.GetQueueMetrics(sq.QueuePath).SetQueueResourceMetrics(metrics.QueueGuaranteed,
k, float64(v))
}
+ } else {
+
metrics.GetQueueMetrics(sq.QueuePath).SetQueueNilResourceMetrics(metrics.QueueGuaranteed)
}
}
// updateMaxResourceMetrics updates max resource metrics.
func (sq *Queue) updateMaxResourceMetrics() {
if sq.maxResource != nil {
for k, v := range sq.maxResource.Resources {
-
metrics.GetQueueMetrics(sq.QueuePath).SetQueueMaxResourceMetrics(k, float64(v))
Review Comment:
Same - move logic into pkg/metrics/queue.go.
##########
pkg/scheduler/objects/queue.go:
##########
@@ -1659,17 +1659,21 @@ func (sq *Queue) SupportTaskGroup() bool {
func (sq *Queue) updateGuaranteedResourceMetrics() {
if sq.guaranteedResource != nil {
for k, v := range sq.guaranteedResource.Resources {
Review Comment:
There's no reason for any of this logic to be here. Encapsulate all of it
within pkg/metrics/queue.go.
##########
pkg/metrics/queue.go:
##########
@@ -301,12 +301,46 @@ func (m *QueueMetrics) AddReleasedContainers(value int) {
m.containerMetrics.WithLabelValues(ContainerReleased).Add(float64(value))
}
-func (m *QueueMetrics) SetQueueGuaranteedResourceMetrics(resourceName string,
value float64) {
- m.setQueueResource(QueueGuaranteed, resourceName, value)
+func (m *QueueMetrics) SetQueueResourceMetrics(state string, resourceName
string, value float64) {
+ m.setQueueResource(state, resourceName, value)
}
-func (m *QueueMetrics) SetQueueMaxResourceMetrics(resourceName string, value
float64) {
- m.setQueueResource(QueueMax, resourceName, value)
+func (m *QueueMetrics) SetQueueNilResourceMetrics(state string) {
+ currResource, err := m.GetAllQueueStateMetrics(state)
+ if err != nil {
+ log.Log(log.Metrics).Warn("failed to get current guaranteed
resource metrics",
+ zap.Error(err))
+ return
+ }
+ for k := range currResource {
+ m.SetQueueResourceMetrics(state, k, float64(0))
+ }
+}
+
+func (m *QueueMetrics) GetAllQueueStateMetrics(state string)
(map[string]float64, error) {
+ metrics := make(map[string]float64)
+ metricFamilies, err := prometheus.DefaultGatherer.Gather()
+ if err != nil {
+ return nil, err
+ }
+
+ for _, metricFamily := range metricFamilies {
Review Comment:
Can this be simplified? This seems... extreme. Perhaps we can simply keep
(as part of the QueueMetrics object) a list of known resource types for the
current queue. Then we can pass in all the resources types at once into
SetQueueResourceMetrics(). Iterate both old and new types. Old types that don't
exist in the new collection get emitted with zero; new types get added to the
known list. Values in the new collection get emitted as-is. Be sure to us
appropriate locking.
This allows a much more efficient lookup that doesn't involve traversing the
entirety of the metrics, and provides a much more convenient interface for
callers.
##########
pkg/metrics/queue.go:
##########
@@ -301,12 +301,46 @@ func (m *QueueMetrics) AddReleasedContainers(value int) {
m.containerMetrics.WithLabelValues(ContainerReleased).Add(float64(value))
}
-func (m *QueueMetrics) SetQueueGuaranteedResourceMetrics(resourceName string,
value float64) {
- m.setQueueResource(QueueGuaranteed, resourceName, value)
+func (m *QueueMetrics) SetQueueResourceMetrics(state string, resourceName
string, value float64) {
+ m.setQueueResource(state, resourceName, value)
}
-func (m *QueueMetrics) SetQueueMaxResourceMetrics(resourceName string, value
float64) {
- m.setQueueResource(QueueMax, resourceName, value)
+func (m *QueueMetrics) SetQueueNilResourceMetrics(state string) {
Review Comment:
This should be made an internal function once logic is moved here from
scheduler/objects/queue.go.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]