This is an automated email from the ASF dual-hosted git repository.
mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new 2e63209b [YUNIKORN-1994] fix release containers metrix in queue (#665)
2e63209b is described below
commit 2e63209b5e6464cb15ec0e2ab96d053aedad6945
Author: PoAn Yang <[email protected]>
AuthorDate: Tue Oct 3 17:02:45 2023 +0530
[YUNIKORN-1994] fix release containers metrix in queue (#665)
Closes: #665
Signed-off-by: Manikandan R <[email protected]>
---
pkg/metrics/init.go | 1 +
pkg/metrics/queue.go | 4 ++++
pkg/metrics/queue_test.go | 8 ++++++++
pkg/scheduler/partition.go | 6 ++----
4 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/pkg/metrics/init.go b/pkg/metrics/init.go
index 341bb230..e7cdeb56 100644
--- a/pkg/metrics/init.go
+++ b/pkg/metrics/init.go
@@ -54,6 +54,7 @@ type CoreQueueMetrics interface {
IncQueueApplicationsCompleted()
IncAllocatedContainer()
IncReleasedContainer()
+ AddReleasedContainers(value int)
SetQueueGuaranteedResourceMetrics(resourceName string, value float64)
SetQueueMaxResourceMetrics(resourceName string, value float64)
SetQueueAllocatedResourceMetrics(resourceName string, value float64)
diff --git a/pkg/metrics/queue.go b/pkg/metrics/queue.go
index 3d55eb0b..a4030d81 100644
--- a/pkg/metrics/queue.go
+++ b/pkg/metrics/queue.go
@@ -108,6 +108,10 @@ func (m *QueueMetrics) IncReleasedContainer() {
m.appMetrics.With(prometheus.Labels{"state": "released"}).Inc()
}
+func (m *QueueMetrics) AddReleasedContainers(value int) {
+ m.appMetrics.With(prometheus.Labels{"state":
"released"}).Add(float64(value))
+}
+
func (m *QueueMetrics) SetQueueGuaranteedResourceMetrics(resourceName string,
value float64) {
m.ResourceMetrics.With(prometheus.Labels{"state": "guaranteed",
"resource": resourceName}).Set(value)
}
diff --git a/pkg/metrics/queue_test.go b/pkg/metrics/queue_test.go
index 7ede358e..ae17ed17 100644
--- a/pkg/metrics/queue_test.go
+++ b/pkg/metrics/queue_test.go
@@ -86,6 +86,14 @@ func TestReleasedContainers(t *testing.T) {
verifyAppMetrics(t, "released")
}
+func TestAddReleasedContainers(t *testing.T) {
+ cqm = getQueueMetrics()
+ defer unregisterQueueMetrics(t)
+
+ cqm.AddReleasedContainers(1)
+ verifyAppMetrics(t, "released")
+}
+
func TestQueueGuaranteedResourceMetrics(t *testing.T) {
cqm = getQueueMetrics()
defer unregisterQueueMetrics(t)
diff --git a/pkg/scheduler/partition.go b/pkg/scheduler/partition.go
index f4bde125..f08558d1 100644
--- a/pkg/scheduler/partition.go
+++ b/pkg/scheduler/partition.go
@@ -768,8 +768,6 @@ func (pc *PartitionContext) removeNodeAllocations(node
*objects.Node) ([]*object
log.Log(log.SchedPartition).Warn("failed to release
resources from queue",
zap.String("appID", alloc.GetApplicationID()),
zap.Error(err))
- } else {
-
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
}
// remove preempted resources
if alloc.IsPreempted() {
@@ -781,6 +779,7 @@ func (pc *PartitionContext) removeNodeAllocations(node
*objects.Node) ([]*object
// the allocation is removed so add it to the list that we
return
released = append(released, alloc)
+
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
log.Log(log.SchedPartition).Info("allocation removed from node",
zap.String("nodeID", node.NodeID),
zap.String("allocationId", allocID))
@@ -1356,8 +1355,6 @@ func (pc *PartitionContext) removeAllocation(release
*si.AllocationRelease) ([]*
zap.String("appID", appID),
zap.String("allocationId", uuid),
zap.Error(err))
- } else {
-
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
}
}
if resources.StrictlyGreaterThanZero(totalPreempting) {
@@ -1371,6 +1368,7 @@ func (pc *PartitionContext) removeAllocation(release
*si.AllocationRelease) ([]*
}
// track the number of allocations, when we replace the result is no
change
pc.updateAllocationCount(-len(released))
+
metrics.GetQueueMetrics(queue.GetQueuePath()).AddReleasedContainers(len(released))
// if the termination type is timeout, we don't notify the shim,
because it's
// originated from that side
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]