This is an automated email from the ASF dual-hosted git repository.

ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new c3d33daab [CELEBORN-1627] Introduce `instance` variable for celeborn 
dashboard to filter metrics
c3d33daab is described below

commit c3d33daabc6360f9ac7a8c397ae57d71fab094e4
Author: Wang, Fei <[email protected]>
AuthorDate: Wed Oct 9 14:47:03 2024 +0800

    [CELEBORN-1627] Introduce `instance` variable for celeborn dashboard to 
filter metrics
    
    ### What changes were proposed in this pull request?
    
    1. add `instanceLabel` in metrics source, prefer `FQDN:port` than `ip:port` 
even with `celeborn.network.bind.preferIpAddress=false` before
    2. add variable  `instance` with  `label_values(metrics_JVMCPUTime_Value, 
instance)` same as `celeborn-jvm-dashboard.json`
    3. add filter `instance=~"${instance}"` for every metrics
    4. add missing `legendFormat` for memory file storage metrics expressions
    
    ### Why are the changes needed?
    
    There should be too many celeborn instances in production use case, it is 
better to add filter with instance.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. introduce new variable.
    
    But the instance default value is `ALL`, same behavior as before.
    
    ### How was this patch tested?
    
    Config: `celeborn.network.bind.preferIpAddress=false`
    <img width="1141" alt="image" 
src="https://github.com/user-attachments/assets/c3161069-790a-4cb2-8654-6d52cf8e5fb0";>
    <img width="944" alt="image" 
src="https://github.com/user-attachments/assets/293b8bd4-252a-459c-aa86-5f4aa75eb594";>
    
    <img width="939" alt="image" 
src="https://github.com/user-attachments/assets/1e1b28af-dd71-4c5b-8285-57473a6c9650";>
    
    For JVM metrics, before it was ip:port, and now it is FQDN:port.
    <img width="947" alt="image" 
src="https://github.com/user-attachments/assets/fe00762f-605d-4b5e-b0a4-c586bdc0ec1a";>
    
    Closes #2777 from turboFei/legend_base.
    
    Authored-by: Wang, Fei <[email protected]>
    Signed-off-by: mingji <[email protected]>
---
 assets/grafana/celeborn-dashboard.json             | 241 ++++++++++++---------
 .../common/metrics/source/AbstractSource.scala     |   9 +-
 .../celeborn/common/metrics/source/Role.scala      |  23 ++
 .../metrics/source/CelebornSourceSuite.scala       |  27 ++-
 .../celeborn/service/deploy/master/Master.scala    |  12 +-
 .../service/deploy/master/MasterSource.scala       |   5 +-
 .../celeborn/common/metrics/MetricsSystem.scala    |   3 -
 .../celeborn/service/deploy/worker/Worker.scala    |  12 +-
 .../service/deploy/worker/WorkerSource.scala       |   5 +-
 9 files changed, 200 insertions(+), 137 deletions(-)

diff --git a/assets/grafana/celeborn-dashboard.json 
b/assets/grafana/celeborn-dashboard.json
index 553bd44b0..1592ec2de 100644
--- a/assets/grafana/celeborn-dashboard.json
+++ b/assets/grafana/celeborn-dashboard.json
@@ -155,7 +155,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_RegisteredShuffleCount_Value",
+              "expr": 
"metrics_RegisteredShuffleCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -249,7 +249,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_WorkerCount_Value",
+              "expr": "metrics_WorkerCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -344,7 +344,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": 
"metrics_DeviceCelebornTotalBytes_Value{role=\"Master\"}",
+              "expr": "metrics_DeviceCelebornTotalBytes_Value{role=\"Master\", 
instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -439,7 +439,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_DeviceCelebornFreeBytes_Value{role=\"Master\"}",
+              "expr": "metrics_DeviceCelebornFreeBytes_Value{role=\"Master\", 
instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -533,7 +533,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_RunningApplicationCount_Value",
+              "expr": 
"metrics_RunningApplicationCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -643,7 +643,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_IsActiveMaster_Value",
+              "expr": 
"metrics_IsActiveMaster_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -740,7 +740,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PartitionSize_Value",
+              "expr": "metrics_PartitionSize_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -835,7 +835,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ShutdownWorkerCount_Value",
+              "expr": 
"metrics_ShutdownWorkerCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -931,7 +931,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ActiveShuffleFileCount_Value{role=\"Master\"}",
+              "expr": "metrics_ActiveShuffleFileCount_Value{role=\"Master\", 
instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1028,7 +1028,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ActiveShuffleSize_Value{role=\"Master\"}",
+              "expr": "metrics_ActiveShuffleSize_Value{role=\"Master\", 
instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1123,7 +1123,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_OfferSlotsTime_Max",
+              "expr": "metrics_OfferSlotsTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -1216,7 +1216,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_OfferSlotsTime_Mean",
+              "expr": "metrics_OfferSlotsTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -1310,7 +1310,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ExcludedWorkerCount_Value",
+              "expr": 
"metrics_ExcludedWorkerCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1405,7 +1405,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_AvailableWorkerCount_Value",
+              "expr": 
"metrics_AvailableWorkerCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1499,7 +1499,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_LostWorkerCount_Value",
+              "expr": 
"metrics_LostWorkerCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1606,7 +1606,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "increase(metrics_SlotsAllocated_Count[1h])",
+              "expr": 
"increase(metrics_SlotsAllocated_Count[1h]){instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -1699,7 +1699,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ReserveSlotsTime_Mean",
+              "expr": 
"metrics_ReserveSlotsTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -1793,7 +1793,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReserveSlotsTime_Max",
+              "expr": 
"metrics_ReserveSlotsTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1886,7 +1886,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PausePushData_Value",
+              "expr": "metrics_PausePushData_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -1979,7 +1979,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PausePushDataAndReplicate_Value",
+              "expr": 
"metrics_PausePushDataAndReplicate_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -2073,7 +2073,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PausePushDataTime_Value",
+              "expr": 
"metrics_PausePushDataTime_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -2167,7 +2167,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PausePushDataAndReplicateTime_Value",
+              "expr": 
"metrics_PausePushDataAndReplicateTime_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -2263,7 +2263,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ActiveShuffleSize_Value{role=\"Worker\"}",
+              "expr": "metrics_ActiveShuffleSize_Value{role=\"Worker\", 
instance=~\"${instance}\"}",
               "instant": false,
               "legendFormat": "${baseLegend}",
               "range": true,
@@ -2359,7 +2359,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ActiveShuffleFileCount_Value{role=\"Worker\"}",
+              "expr": "metrics_ActiveShuffleFileCount_Value{role=\"Worker\", 
instance=~\"${instance}\"}",
               "instant": false,
               "legendFormat": "${baseLegend}",
               "range": true,
@@ -2453,7 +2453,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ActiveConnectionCount_Count",
+              "expr": 
"metrics_ActiveConnectionCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -2546,7 +2546,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ActiveSlotsCount_Value",
+              "expr": 
"metrics_ActiveSlotsCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -2641,7 +2641,7 @@
               },
               "disableTextWrap": false,
               "editorMode": "builder",
-              "expr": "metrics_FlushWorkingQueueSize_Value",
+              "expr": 
"metrics_FlushWorkingQueueSize_Value{instance=~\"${instance}\"}",
               "fullMetaSearch": false,
               "includeNullMetadata": true,
               "instant": false,
@@ -2841,7 +2841,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PrimaryPushDataTime_Mean",
+              "expr": 
"metrics_PrimaryPushDataTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -2931,7 +2931,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_PrimaryPushDataTime_Max",
+              "expr": 
"metrics_PrimaryPushDataTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -3021,7 +3021,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ReplicaPushDataTime_Mean",
+              "expr": 
"metrics_ReplicaPushDataTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -3111,7 +3111,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_ReplicaPushDataTime_Max",
+              "expr": 
"metrics_ReplicaPushDataTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -3201,7 +3201,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_WriteDataSuccessCount_Count",
+              "expr": 
"metrics_WriteDataSuccessCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3292,7 +3292,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_WriteDataFailCount_Count",
+              "expr": 
"metrics_WriteDataFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3383,7 +3383,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataFailCount_Count",
+              "expr": 
"metrics_ReplicateDataFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3474,7 +3474,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataWriteFailCount_Count",
+              "expr": 
"metrics_ReplicateDataWriteFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3565,7 +3565,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataCreateConnectionFailCount_Count",
+              "expr": 
"metrics_ReplicateDataCreateConnectionFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3656,7 +3656,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataConnectionExceptionCount_Count",
+              "expr": 
"metrics_ReplicateDataConnectionExceptionCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3747,7 +3747,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataTimeoutCount_Count",
+              "expr": 
"metrics_ReplicateDataTimeoutCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3838,7 +3838,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicateDataFailNonCriticalCauseCount_Count",
+              "expr": 
"metrics_ReplicateDataFailNonCriticalCauseCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -3929,7 +3929,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_WriteDataHardSplitCount_Count",
+              "expr": 
"metrics_WriteDataHardSplitCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4034,7 +4034,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_OpenStreamTime_Mean",
+              "expr": "metrics_OpenStreamTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -4124,7 +4124,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_OpenStreamTime_Max",
+              "expr": "metrics_OpenStreamTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -4214,7 +4214,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_FetchChunkTime_Mean",
+              "expr": "metrics_FetchChunkTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -4304,7 +4304,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_FetchChunkTime_Max",
+              "expr": "metrics_FetchChunkTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -4394,7 +4394,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_OpenStreamSuccessCount_Count",
+              "expr": 
"metrics_OpenStreamSuccessCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4485,7 +4485,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_OpenStreamFailCount_Count",
+              "expr": 
"metrics_OpenStreamFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4576,7 +4576,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_FetchChunkSuccessCount_Count",
+              "expr": 
"metrics_FetchChunkSuccessCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4667,7 +4667,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_FetchChunkFailCount_Count",
+              "expr": 
"metrics_FetchChunkFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4758,7 +4758,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ActiveChunkStreamCount_Value",
+              "expr": 
"metrics_ActiveChunkStreamCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -4863,7 +4863,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_TakeBufferTime_Mean",
+              "expr": "metrics_TakeBufferTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -4953,7 +4953,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_TakeBufferTime_Max",
+              "expr": "metrics_TakeBufferTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -5043,7 +5043,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_FlushDataTime_Mean",
+              "expr": "metrics_FlushDataTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -5133,7 +5133,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_FlushDataTime_Max",
+              "expr": "metrics_FlushDataTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -5223,7 +5223,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_CommitFilesTime_Mean",
+              "expr": 
"metrics_CommitFilesTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -5313,7 +5313,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_CommitFilesTime_Max",
+              "expr": "metrics_CommitFilesTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -5421,7 +5421,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_NettyMemory_Value",
+              "expr": "metrics_NettyMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -5516,7 +5516,8 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_DirectMemoryUsageRatio_Value",
+              "expr": 
"metrics_DirectMemoryUsageRatio_Value{instance=~\"${instance}\"}",
+              "legendFormat": "${baseLegend}",
               "instant": false,
               "range": true,
               "refId": "A"
@@ -5611,7 +5612,8 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_MemoryFileStorageSize_Value",
+              "expr": 
"metrics_MemoryFileStorageSize_Value{instance=~\"${instance}\"}",
+              "legendFormat": "${baseLegend}",
               "instant": false,
               "range": true,
               "refId": "A"
@@ -5705,7 +5707,8 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_MemoryStorageFileCount_Value",
+              "expr": 
"metrics_MemoryStorageFileCount_Value{instance=~\"${instance}\"}",
+              "legendFormat": "${baseLegend}",
               "instant": false,
               "range": true,
               "refId": "A"
@@ -5796,7 +5799,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_DiskBuffer_Value",
+              "expr": "metrics_DiskBuffer_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -5887,7 +5890,8 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_EvictedFileCount_Value",
+              "expr": 
"metrics_EvictedFileCount_Value{instance=~\"${instance}\"}",
+              "legendFormat": "${baseLegend}",
               "instant": false,
               "range": true,
               "refId": "A"
@@ -5979,7 +5983,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_push_usedHeapMemory_Value",
+              "expr": 
"metrics_push_usedHeapMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6071,7 +6075,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_push_usedDirectMemory_Value",
+              "expr": 
"metrics_push_usedDirectMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6163,7 +6167,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_fetch_usedHeapMemory_Value",
+              "expr": 
"metrics_fetch_usedHeapMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6255,7 +6259,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_fetch_usedDirectMemory_Value",
+              "expr": 
"metrics_fetch_usedDirectMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6347,7 +6351,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_replicate_usedHeapMemory_Value",
+              "expr": 
"metrics_replicate_usedHeapMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6439,7 +6443,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_replicate_usedDirectMemory_Value",
+              "expr": 
"metrics_replicate_usedDirectMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6530,7 +6534,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ReadBufferAllocatedCount_Value",
+              "expr": 
"metrics_ReadBufferAllocatedCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6622,7 +6626,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_BufferStreamReadBuffer_Value",
+              "expr": 
"metrics_BufferStreamReadBuffer_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6713,7 +6717,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ReadBufferDispatcherRequestsLength_Value",
+              "expr": 
"metrics_ReadBufferDispatcherRequestsLength_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -6818,7 +6822,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_SortTime_Mean",
+              "expr": "metrics_SortTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -6908,7 +6912,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_SortTime_Max",
+              "expr": "metrics_SortTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "refId": "A"
             }
@@ -6997,7 +7001,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_SortingFiles_Value",
+              "expr": "metrics_SortingFiles_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7087,7 +7091,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_SortedFiles_Value",
+              "expr": "metrics_SortedFiles_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7178,7 +7182,7 @@
                 "type": "prometheus",
                 "uid": "${DS_PROMETHEUS}"
               },
-              "expr": "metrics_SortMemory_Value",
+              "expr": "metrics_SortMemory_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7269,7 +7273,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_SortedFileSize_Value",
+              "expr": 
"metrics_SortedFileSize_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7376,7 +7380,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PotentialConsumeSpeed_Value",
+              "expr": 
"metrics_PotentialConsumeSpeed_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7469,7 +7473,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_WorkerConsumeSpeed_Value",
+              "expr": 
"metrics_WorkerConsumeSpeed_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7562,7 +7566,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_UserProduceSpeed_Value",
+              "expr": 
"metrics_UserProduceSpeed_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7668,7 +7672,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryPushDataHandshakeTime_Mean",
+              "expr": 
"metrics_PrimaryPushDataHandshakeTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7760,7 +7764,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryPushDataHandshakeTime_Max",
+              "expr": 
"metrics_PrimaryPushDataHandshakeTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7852,7 +7856,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaPushDataHandshakeTime_Mean",
+              "expr": 
"metrics_ReplicaPushDataHandshakeTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -7944,7 +7948,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaPushDataHandshakeTime_Max",
+              "expr": 
"metrics_ReplicaPushDataHandshakeTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8036,7 +8040,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryRegionStartTime_Mean",
+              "expr": 
"metrics_PrimaryRegionStartTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8128,7 +8132,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryRegionStartTime_Max",
+              "expr": 
"metrics_PrimaryRegionStartTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8220,7 +8224,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaRegionStartTime_Mean",
+              "expr": 
"metrics_ReplicaRegionStartTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8312,7 +8316,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaRegionStartTime_Max",
+              "expr": 
"metrics_ReplicaRegionStartTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8404,7 +8408,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryRegionFinishTime_Mean",
+              "expr": 
"metrics_PrimaryRegionFinishTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8496,7 +8500,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PrimaryRegionFinishTime_Max",
+              "expr": 
"metrics_PrimaryRegionFinishTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8588,7 +8592,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaRegionFinishTime_Mean",
+              "expr": 
"metrics_ReplicaRegionFinishTime_Mean{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8680,7 +8684,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_ReplicaRegionFinishTime_Max",
+              "expr": 
"metrics_ReplicaRegionFinishTime_Max{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8771,7 +8775,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_PushDataHandshakeFailCount_Count",
+              "expr": 
"metrics_PushDataHandshakeFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8862,7 +8866,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_RegionStartFailCount_Count",
+              "expr": 
"metrics_RegionStartFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -8953,7 +8957,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_RegionStartFailCount_Count",
+              "expr": 
"metrics_RegionStartFailCount_Count{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9517,7 +9521,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ActiveCreditStreamCount_Value",
+              "expr": 
"metrics_ActiveCreditStreamCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9608,7 +9612,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "builder",
-              "expr": "metrics_ActiveMapPartitionCount_Value",
+              "expr": 
"metrics_ActiveMapPartitionCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9714,7 +9718,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_JVMCPUTime_Value",
+              "expr": "metrics_JVMCPUTime_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9807,7 +9811,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_LastMinuteSystemLoad_Value",
+              "expr": 
"metrics_LastMinuteSystemLoad_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9900,7 +9904,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_DeviceOSFreeBytes_Value",
+              "expr": 
"metrics_DeviceOSFreeBytes_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -9993,7 +9997,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_DeviceCelebornFreeBytes_Value",
+              "expr": 
"metrics_DeviceCelebornFreeBytes_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10086,7 +10090,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_AvailableProcessors_Value",
+              "expr": 
"metrics_AvailableProcessors_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10192,7 +10196,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_diskFileCount_Value",
+              "expr": "metrics_diskFileCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10285,7 +10289,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_diskBytesWritten_Value",
+              "expr": 
"metrics_diskBytesWritten_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10377,7 +10381,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_hdfsFileCount_Value",
+              "expr": "metrics_hdfsFileCount_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10470,7 +10474,7 @@
                 "uid": "${DS_PROMETHEUS}"
               },
               "editorMode": "code",
-              "expr": "metrics_hdfsBytesWritten_Value",
+              "expr": 
"metrics_hdfsBytesWritten_Value{instance=~\"${instance}\"}",
               "legendFormat": "${baseLegend}",
               "range": true,
               "refId": "A"
@@ -10490,6 +10494,31 @@
   "tags": [],
   "templating": {
     "list": [
+      {
+        "current": {},
+        "datasource": {
+          "type": "prometheus",
+          "uid": "${DS_PROMETHEUS}"
+        },
+        "definition": "label_values(metrics_JVMCPUTime_Value, instance)",
+        "hide": 0,
+        "includeAll": true,
+        "label": "instance",
+        "mapping": "",
+        "mappingOnLegend": true,
+        "multi": true,
+        "name": "instance",
+        "options": [],
+        "query": {
+          "query": "label_values(metrics_JVMCPUTime_Value, instance)",
+          "refId": "StandardVariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "type": "query"
+      },
       {
         "current": {
           "selected": false,
@@ -10527,4 +10556,4 @@
   "uid": "U_qgru_7z",
   "version": 2,
   "weekStart": ""
-}
\ No newline at end of file
+}
diff --git 
a/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
 
b/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
index 58aa71bee..95562c911 100644
--- 
a/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
+++ 
b/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
@@ -64,7 +64,14 @@ abstract class AbstractSource(conf: CelebornConf, role: 
String)
     
ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-metrics-cleaner")
 
   val roleLabel: (String, String) = "role" -> role
-  val staticLabels: Map[String, String] = conf.metricsExtraLabels + roleLabel
+  val instanceLabel: Map[String, String] = role match {
+    case Role.MASTER =>
+      Map("instance" -> s"${Utils.localHostName(conf)}:${conf.masterHttpPort}")
+    case Role.WORKER =>
+      Map("instance" -> s"${Utils.localHostName(conf)}:${conf.workerHttpPort}")
+    case _ => Map.empty
+  }
+  val staticLabels: Map[String, String] = conf.metricsExtraLabels + roleLabel 
++ instanceLabel
   val staticLabelsString: String = MetricLabels.labelString(staticLabels)
 
   val applicationLabel = "applicationId"
diff --git 
a/common/src/main/scala/org/apache/celeborn/common/metrics/source/Role.scala 
b/common/src/main/scala/org/apache/celeborn/common/metrics/source/Role.scala
new file mode 100644
index 000000000..50b509643
--- /dev/null
+++ b/common/src/main/scala/org/apache/celeborn/common/metrics/source/Role.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.celeborn.common.metrics.source
+
+object Role {
+  val MASTER = "master"
+  val WORKER = "worker"
+}
diff --git 
a/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
 
b/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
index 89330bee9..d6eeb2358 100644
--- 
a/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
+++ 
b/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
@@ -24,11 +24,15 @@ class CelebornSourceSuite extends CelebornFunSuite {
 
   test("test getMetrics with customized label") {
     val conf = new CelebornConf()
-    createAbstractSourceAndCheck(conf, "")
+    createAbstractSourceAndCheck(conf, "", Role.MASTER)
+    createAbstractSourceAndCheck(conf, "", Role.WORKER)
   }
 
-  def createAbstractSourceAndCheck(conf: CelebornConf, extraLabels: String): 
Unit = {
-    val mockSource = new AbstractSource(conf, "mock") {
+  def createAbstractSourceAndCheck(
+      conf: CelebornConf,
+      extraLabels: String,
+      role: String = "mock"): Unit = {
+    val mockSource = new AbstractSource(conf, role) {
       override def sourceName: String = "mockSource"
     }
     val user1 = Map("user" -> "user1")
@@ -55,12 +59,17 @@ class CelebornSourceSuite extends CelebornFunSuite {
     if (extraLabels.nonEmpty) {
       extraLabelsStr = extraLabels + ","
     }
-    val exp1 = s"""metrics_Gauge1_Value{${extraLabelsStr}role="mock"} 1000"""
-    val exp2 = 
s"""metrics_Gauge2_Value{${extraLabelsStr}role="mock",user="user1"} 2000"""
-    val exp3 = s"""metrics_Counter1_Count{${extraLabelsStr}role="mock"} 3000"""
-    val exp4 = 
s"""metrics_Counter2_Count{${extraLabelsStr}role="mock",user="user2"} 4000"""
-    val exp5 = s"""metrics_Timer1_Count{${extraLabelsStr}role="mock"} 1"""
-    val exp6 = 
s"""metrics_Timer2_Count{${extraLabelsStr}role="mock",user="user3"} 1"""
+    val instanceLabelStr =
+      mockSource.instanceLabel.map(kv => 
s"""${kv._1}="${kv._2}",""").mkString(",")
+    val exp1 = 
s"""metrics_Gauge1_Value{${extraLabelsStr}${instanceLabelStr}role="$role"} 
1000"""
+    val exp2 =
+      
s"""metrics_Gauge2_Value{${extraLabelsStr}${instanceLabelStr}role="$role",user="user1"}
 2000"""
+    val exp3 = 
s"""metrics_Counter1_Count{${extraLabelsStr}${instanceLabelStr}role="$role"} 
3000"""
+    val exp4 =
+      
s"""metrics_Counter2_Count{${extraLabelsStr}${instanceLabelStr}role="$role",user="user2"}
 4000"""
+    val exp5 = 
s"""metrics_Timer1_Count{${extraLabelsStr}${instanceLabelStr}role="$role"} 1"""
+    val exp6 =
+      
s"""metrics_Timer2_Count{${extraLabelsStr}${instanceLabelStr}role="$role",user="user3"}
 1"""
 
     assert(res.contains(exp1))
     assert(res.contains(exp2))
diff --git 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
index 544c9656b..d0623a20b 100644
--- 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
+++ 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
@@ -39,7 +39,7 @@ import org.apache.celeborn.common.identity.UserIdentifier
 import org.apache.celeborn.common.internal.Logging
 import org.apache.celeborn.common.meta.{DiskInfo, WorkerInfo, WorkerStatus}
 import org.apache.celeborn.common.metrics.MetricsSystem
-import org.apache.celeborn.common.metrics.source.{JVMCPUSource, JVMSource, 
ResourceConsumptionSource, SystemMiscSource, ThreadPoolSource}
+import org.apache.celeborn.common.metrics.source.{JVMCPUSource, JVMSource, 
ResourceConsumptionSource, Role, SystemMiscSource, ThreadPoolSource}
 import org.apache.celeborn.common.network.CelebornRackResolver
 import org.apache.celeborn.common.network.protocol.TransportMessage
 import org.apache.celeborn.common.protocol._
@@ -67,15 +67,15 @@ private[celeborn] class Master(
     MetricsSystem.createMetricsSystem(serviceName, conf)
   // init and register master metrics
   private val resourceConsumptionSource =
-    new ResourceConsumptionSource(conf, MetricsSystem.ROLE_MASTER)
-  private val threadPoolSource = ThreadPoolSource(conf, 
MetricsSystem.ROLE_MASTER)
+    new ResourceConsumptionSource(conf, Role.MASTER)
+  private val threadPoolSource = ThreadPoolSource(conf, Role.MASTER)
   private val masterSource = new MasterSource(conf)
   metricsSystem.registerSource(resourceConsumptionSource)
   metricsSystem.registerSource(masterSource)
   metricsSystem.registerSource(threadPoolSource)
-  metricsSystem.registerSource(new JVMSource(conf, MetricsSystem.ROLE_MASTER))
-  metricsSystem.registerSource(new JVMCPUSource(conf, 
MetricsSystem.ROLE_MASTER))
-  metricsSystem.registerSource(new SystemMiscSource(conf, 
MetricsSystem.ROLE_MASTER))
+  metricsSystem.registerSource(new JVMSource(conf, Role.MASTER))
+  metricsSystem.registerSource(new JVMCPUSource(conf, Role.MASTER))
+  metricsSystem.registerSource(new SystemMiscSource(conf, Role.MASTER))
 
   private val bindPreferIP: Boolean = conf.bindPreferIP
   private val authEnabled = conf.authEnabled
diff --git 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala
 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala
index 970fa4f5b..b2e725244 100644
--- 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala
+++ 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/MasterSource.scala
@@ -18,10 +18,9 @@
 package org.apache.celeborn.service.deploy.master
 
 import org.apache.celeborn.common.CelebornConf
-import org.apache.celeborn.common.metrics.MetricsSystem
-import org.apache.celeborn.common.metrics.source.AbstractSource
+import org.apache.celeborn.common.metrics.source.{AbstractSource, Role}
 
-class MasterSource(conf: CelebornConf) extends AbstractSource(conf, 
MetricsSystem.ROLE_MASTER) {
+class MasterSource(conf: CelebornConf) extends AbstractSource(conf, 
Role.MASTER) {
   override val sourceName = "master"
 
   import MasterSource._
diff --git 
a/service/src/main/scala/org/apache/celeborn/common/metrics/MetricsSystem.scala 
b/service/src/main/scala/org/apache/celeborn/common/metrics/MetricsSystem.scala
index ab919bb14..3baab6ebf 100644
--- 
a/service/src/main/scala/org/apache/celeborn/common/metrics/MetricsSystem.scala
+++ 
b/service/src/main/scala/org/apache/celeborn/common/metrics/MetricsSystem.scala
@@ -177,9 +177,6 @@ object MetricsSystem {
   val SINK_REGEX: Regex = "^sink\\.(.+)\\.(.+)".r
   val SOURCE_REGEX: Regex = 
"^org.apache.celeborn.common.metrics.source\\.(.+)\\.(.+)".r
 
-  val ROLE_WORKER = "Worker"
-  val ROLE_MASTER = "Master"
-
   private[this] val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
   private[this] val MINIMAL_POLL_PERIOD = 1
 
diff --git 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
index e3cbd8642..d0371e2bd 100644
--- 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
+++ 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
@@ -37,7 +37,7 @@ import org.apache.celeborn.common.identity.UserIdentifier
 import org.apache.celeborn.common.internal.Logging
 import org.apache.celeborn.common.meta.{DiskInfo, WorkerInfo, 
WorkerPartitionLocationInfo}
 import org.apache.celeborn.common.metrics.MetricsSystem
-import org.apache.celeborn.common.metrics.source.{JVMCPUSource, JVMSource, 
ResourceConsumptionSource, SystemMiscSource, ThreadPoolSource}
+import org.apache.celeborn.common.metrics.source.{JVMCPUSource, JVMSource, 
ResourceConsumptionSource, Role, SystemMiscSource, ThreadPoolSource}
 import org.apache.celeborn.common.network.{CelebornRackResolver, 
TransportContext}
 import org.apache.celeborn.common.network.sasl.SaslServerBootstrap
 import org.apache.celeborn.common.network.server.TransportServerBootstrap
@@ -73,14 +73,14 @@ private[celeborn] class Worker(
     MetricsSystem.createMetricsSystem(serviceName, conf)
   val workerSource = new WorkerSource(conf)
   private val resourceConsumptionSource =
-    new ResourceConsumptionSource(conf, MetricsSystem.ROLE_WORKER)
-  private val threadPoolSource = ThreadPoolSource(conf, 
MetricsSystem.ROLE_WORKER)
+    new ResourceConsumptionSource(conf, Role.WORKER)
+  private val threadPoolSource = ThreadPoolSource(conf, Role.WORKER)
   metricsSystem.registerSource(workerSource)
   metricsSystem.registerSource(threadPoolSource)
   metricsSystem.registerSource(resourceConsumptionSource)
-  metricsSystem.registerSource(new JVMSource(conf, MetricsSystem.ROLE_WORKER))
-  metricsSystem.registerSource(new JVMCPUSource(conf, 
MetricsSystem.ROLE_WORKER))
-  metricsSystem.registerSource(new SystemMiscSource(conf, 
MetricsSystem.ROLE_WORKER))
+  metricsSystem.registerSource(new JVMSource(conf, Role.WORKER))
+  metricsSystem.registerSource(new JVMCPUSource(conf, Role.WORKER))
+  metricsSystem.registerSource(new SystemMiscSource(conf, Role.WORKER))
 
   private val topResourceConsumptionCount = 
conf.metricsWorkerAppTopResourceConsumptionCount
   private val topApplicationUserIdentifiers =
diff --git 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
index 15096fadc..26532a6bf 100644
--- 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
+++ 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
@@ -25,12 +25,11 @@ import scala.collection.JavaConverters._
 import com.google.common.collect.Sets
 
 import org.apache.celeborn.common.CelebornConf
-import org.apache.celeborn.common.metrics.MetricsSystem
-import org.apache.celeborn.common.metrics.source.AbstractSource
+import org.apache.celeborn.common.metrics.source.{AbstractSource, Role}
 import org.apache.celeborn.common.network.client.TransportClient
 import org.apache.celeborn.common.util.{JavaUtils, Utils}
 
-class WorkerSource(conf: CelebornConf) extends AbstractSource(conf, 
MetricsSystem.ROLE_WORKER) {
+class WorkerSource(conf: CelebornConf) extends AbstractSource(conf, 
Role.WORKER) {
   override val sourceName = "worker"
 
   val appActiveConnections: ConcurrentHashMap[String, util.Set[String]] =

Reply via email to