This is an automated email from the ASF dual-hosted git repository.

mani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e63209b [YUNIKORN-1994] fix release containers metrix in queue (#665)
2e63209b is described below

commit 2e63209b5e6464cb15ec0e2ab96d053aedad6945
Author: PoAn Yang <[email protected]>
AuthorDate: Tue Oct 3 17:02:45 2023 +0530

    [YUNIKORN-1994] fix release containers metrix in queue (#665)
    
    Closes: #665
    
    Signed-off-by: Manikandan R <[email protected]>
---
 pkg/metrics/init.go        | 1 +
 pkg/metrics/queue.go       | 4 ++++
 pkg/metrics/queue_test.go  | 8 ++++++++
 pkg/scheduler/partition.go | 6 ++----
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/pkg/metrics/init.go b/pkg/metrics/init.go
index 341bb230..e7cdeb56 100644
--- a/pkg/metrics/init.go
+++ b/pkg/metrics/init.go
@@ -54,6 +54,7 @@ type CoreQueueMetrics interface {
        IncQueueApplicationsCompleted()
        IncAllocatedContainer()
        IncReleasedContainer()
+       AddReleasedContainers(value int)
        SetQueueGuaranteedResourceMetrics(resourceName string, value float64)
        SetQueueMaxResourceMetrics(resourceName string, value float64)
        SetQueueAllocatedResourceMetrics(resourceName string, value float64)
diff --git a/pkg/metrics/queue.go b/pkg/metrics/queue.go
index 3d55eb0b..a4030d81 100644
--- a/pkg/metrics/queue.go
+++ b/pkg/metrics/queue.go
@@ -108,6 +108,10 @@ func (m *QueueMetrics) IncReleasedContainer() {
        m.appMetrics.With(prometheus.Labels{"state": "released"}).Inc()
 }
 
+func (m *QueueMetrics) AddReleasedContainers(value int) {
+       m.appMetrics.With(prometheus.Labels{"state": 
"released"}).Add(float64(value))
+}
+
 func (m *QueueMetrics) SetQueueGuaranteedResourceMetrics(resourceName string, 
value float64) {
        m.ResourceMetrics.With(prometheus.Labels{"state": "guaranteed", 
"resource": resourceName}).Set(value)
 }
diff --git a/pkg/metrics/queue_test.go b/pkg/metrics/queue_test.go
index 7ede358e..ae17ed17 100644
--- a/pkg/metrics/queue_test.go
+++ b/pkg/metrics/queue_test.go
@@ -86,6 +86,14 @@ func TestReleasedContainers(t *testing.T) {
        verifyAppMetrics(t, "released")
 }
 
+func TestAddReleasedContainers(t *testing.T) {
+       cqm = getQueueMetrics()
+       defer unregisterQueueMetrics(t)
+
+       cqm.AddReleasedContainers(1)
+       verifyAppMetrics(t, "released")
+}
+
 func TestQueueGuaranteedResourceMetrics(t *testing.T) {
        cqm = getQueueMetrics()
        defer unregisterQueueMetrics(t)
diff --git a/pkg/scheduler/partition.go b/pkg/scheduler/partition.go
index f4bde125..f08558d1 100644
--- a/pkg/scheduler/partition.go
+++ b/pkg/scheduler/partition.go
@@ -768,8 +768,6 @@ func (pc *PartitionContext) removeNodeAllocations(node 
*objects.Node) ([]*object
                        log.Log(log.SchedPartition).Warn("failed to release 
resources from queue",
                                zap.String("appID", alloc.GetApplicationID()),
                                zap.Error(err))
-               } else {
-                       
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
                }
                // remove preempted resources
                if alloc.IsPreempted() {
@@ -781,6 +779,7 @@ func (pc *PartitionContext) removeNodeAllocations(node 
*objects.Node) ([]*object
 
                // the allocation is removed so add it to the list that we 
return
                released = append(released, alloc)
+               
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
                log.Log(log.SchedPartition).Info("allocation removed from node",
                        zap.String("nodeID", node.NodeID),
                        zap.String("allocationId", allocID))
@@ -1356,8 +1355,6 @@ func (pc *PartitionContext) removeAllocation(release 
*si.AllocationRelease) ([]*
                                zap.String("appID", appID),
                                zap.String("allocationId", uuid),
                                zap.Error(err))
-               } else {
-                       
metrics.GetQueueMetrics(queue.GetQueuePath()).IncReleasedContainer()
                }
        }
        if resources.StrictlyGreaterThanZero(totalPreempting) {
@@ -1371,6 +1368,7 @@ func (pc *PartitionContext) removeAllocation(release 
*si.AllocationRelease) ([]*
        }
        // track the number of allocations, when we replace the result is no 
change
        pc.updateAllocationCount(-len(released))
+       
metrics.GetQueueMetrics(queue.GetQueuePath()).AddReleasedContainers(len(released))
 
        // if the termination type is timeout, we don't notify the shim, 
because it's
        // originated from that side


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to