This is an automated email from the ASF dual-hosted git repository.
rexxiong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new c40f69b94 [CELEBORN-1766] Add detail metrics about fetch chunk
c40f69b94 is described below
commit c40f69b941af65f192e5db6d48c64fd907ee7783
Author: mingji <[email protected]>
AuthorDate: Mon Dec 16 16:17:14 2024 +0800
[CELEBORN-1766] Add detail metrics about fetch chunk
### What changes were proposed in this pull request?
1. Add histogram
2. Collect critical metrics about fetch chunk
### Why are the changes needed?
1. To find out IO pattern of fetch chunk
2. To have detail metrics about fetch chunk time
### Does this PR introduce _any_ user-facing change?
NO.
### How was this patch tested?
GA and cluster.
<img width="940" alt="截屏2024-12-09 15 42 50"
src="https://github.com/user-attachments/assets/9f526103-c162-4607-a031-ba90f42ae83e">
<img width="962" alt="截屏2024-12-09 15 42 56"
src="https://github.com/user-attachments/assets/c17822da-0433-4701-b0cc-0887ac970353">
Closes #2983 from FMX/b1766.
Authored-by: mingji <[email protected]>
Signed-off-by: Shuang <[email protected]>
---
assets/grafana/celeborn-dashboard.json | 1086 +++++++++++++++-----
.../celeborn/common/network/TransportContext.java | 13 +
.../common/network/protocol/MessageEncoder.java | 10 +-
.../common/network/protocol/MessageWithHeader.java | 23 +-
.../common/metrics/source/AbstractSource.scala | 63 +-
.../metrics/source/CelebornSourceSuite.scala | 16 +
.../celeborn/common/metrics/sink/JsonServlet.scala | 4 +-
.../celeborn/service/deploy/worker/Worker.scala | 3 +-
.../service/deploy/worker/WorkerSource.scala | 5 +
9 files changed, 973 insertions(+), 250 deletions(-)
diff --git a/assets/grafana/celeborn-dashboard.json
b/assets/grafana/celeborn-dashboard.json
index a60d3b41d..2fa98c1ec 100644
--- a/assets/grafana/celeborn-dashboard.json
+++ b/assets/grafana/celeborn-dashboard.json
@@ -15,7 +15,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
- "version": "11.0.0"
+ "version": "11.1.4"
},
{
"type": "datasource",
@@ -130,7 +130,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 1
+ "y": 49
},
"id": 94,
"options": {
@@ -221,7 +221,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 1
+ "y": 49
},
"id": 2,
"options": {
@@ -313,7 +313,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 10
+ "y": 58
},
"id": 185,
"options": {
@@ -405,7 +405,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 10
+ "y": 58
},
"id": 186,
"options": {
@@ -496,7 +496,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 19
+ "y": 67
},
"id": 95,
"options": {
@@ -602,7 +602,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 2
+ "y": 50
},
"id": 121,
"options": {
@@ -696,7 +696,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 2
+ "y": 50
},
"id": 120,
"options": {
@@ -789,7 +789,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 10
+ "y": 58
},
"id": 189,
"options": {
@@ -881,7 +881,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 10
+ "y": 58
},
"id": 124,
"options": {
@@ -975,7 +975,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 18
+ "y": 66
},
"id": 122,
"options": {
@@ -1068,7 +1068,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 18
+ "y": 66
},
"id": 102,
"options": {
@@ -1159,7 +1159,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 26
+ "y": 74
},
"id": 100,
"options": {
@@ -1250,7 +1250,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 26
+ "y": 74
},
"id": 117,
"options": {
@@ -1343,7 +1343,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 34
+ "y": 82
},
"id": 194,
"options": {
@@ -1436,7 +1436,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 34
+ "y": 82
},
"id": 36,
"options": {
@@ -1528,7 +1528,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 34
+ "y": 82
},
"id": 218,
"options": {
@@ -1616,7 +1616,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 34
+ "y": 82
},
"id": 219,
"options": {
@@ -1721,7 +1721,7 @@
"h": 10,
"w": 24,
"x": 0,
- "y": 3
+ "y": 51
},
"id": 84,
"options": {
@@ -1812,7 +1812,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 13
+ "y": 61
},
"id": 60,
"options": {
@@ -1903,7 +1903,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 13
+ "y": 61
},
"id": 62,
"options": {
@@ -1995,7 +1995,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 22
+ "y": 70
},
"id": 90,
"options": {
@@ -2086,7 +2086,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 22
+ "y": 70
},
"id": 92,
"options": {
@@ -2178,7 +2178,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 30
+ "y": 78
},
"id": 182,
"options": {
@@ -2270,7 +2270,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 30
+ "y": 78
},
"id": 184,
"options": {
@@ -2363,7 +2363,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 38
+ "y": 86
},
"id": 181,
"options": {
@@ -2457,7 +2457,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 38
+ "y": 86
},
"id": 183,
"options": {
@@ -2550,7 +2550,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 46
+ "y": 94
},
"id": 179,
"options": {
@@ -2641,7 +2641,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 46
+ "y": 94
},
"id": 48,
"options": {
@@ -2732,7 +2732,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 54
+ "y": 102
},
"id": 193,
"options": {
@@ -2829,7 +2829,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 54
+ "y": 102
},
"id": 195,
"options": {
@@ -2862,7 +2862,6 @@
},
{
"datasource": {
- "default": false,
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
@@ -2906,8 +2905,7 @@
"mode": "absolute",
"steps": [
{
- "color": "green",
- "value": null
+ "color": "green"
},
{
"color": "red",
@@ -2922,7 +2920,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 62
+ "y": 110
},
"id": 217,
"options": {
@@ -2945,8 +2943,8 @@
},
"editorMode": "builder",
"expr":
"metrics_IsDecommissioningWorker_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
"instant": false,
+ "legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
@@ -3029,7 +3027,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 4
+ "y": 52
},
"id": 68,
"options": {
@@ -3119,7 +3117,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 4
+ "y": 52
},
"id": 70,
"options": {
@@ -3209,7 +3207,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 13
+ "y": 61
},
"id": 72,
"options": {
@@ -3299,7 +3297,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 13
+ "y": 61
},
"id": 74,
"options": {
@@ -3388,7 +3386,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 22
+ "y": 70
},
"id": 83,
"options": {
@@ -3479,7 +3477,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 22
+ "y": 70
},
"id": 76,
"options": {
@@ -3570,7 +3568,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 30
+ "y": 78
},
"id": 128,
"options": {
@@ -3661,7 +3659,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 30
+ "y": 78
},
"id": 129,
"options": {
@@ -3752,7 +3750,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 38
+ "y": 86
},
"id": 130,
"options": {
@@ -3843,7 +3841,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 38
+ "y": 86
},
"id": 132,
"options": {
@@ -3934,7 +3932,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 46
+ "y": 94
},
"id": 131,
"options": {
@@ -4025,7 +4023,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 46
+ "y": 94
},
"id": 133,
"options": {
@@ -4101,7 +4099,581 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green"
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 102
+ },
+ "id": 79,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr":
"metrics_WriteDataHardSplitCount_Count{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_WriteDataHardSplitCount_Count",
+ "type": "timeseries"
+ }
+ ],
+ "title": "PushRelatives",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 12,
+ "panels": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 5
+ },
+ "id": 66,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "metrics_OpenStreamTime_Mean{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_OpenStreamTime_Mean",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 5
+ },
+ "id": 96,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "metrics_OpenStreamTime_Max{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_OpenStreamTime_Max",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "id": 17,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "metrics_FetchChunkTime_Mean{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_FetchChunkTime_Mean",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "id": 18,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "expr": "metrics_FetchChunkTime_Max{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_FetchChunkTime_Max",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 21
+ },
+ "id": 81,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr":
"metrics_OpenStreamSuccessCount_Count{instance=~\"${instance}\"}",
+ "legendFormat": "${baseLegend}",
+ "range": true,
+ "refId": "A"
+ }
+ ],
+ "title": "metrics_OpenStreamSuccessCount_Count",
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": false,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
},
{
"color": "red",
@@ -4115,10 +4687,10 @@
"gridPos": {
"h": 8,
"w": 12,
- "x": 0,
- "y": 54
+ "x": 12,
+ "y": 21
},
- "id": 79,
+ "id": 77,
"options": {
"legend": {
"calcs": [],
@@ -4138,29 +4710,15 @@
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
- "expr":
"metrics_WriteDataHardSplitCount_Count{instance=~\"${instance}\"}",
+ "expr":
"metrics_OpenStreamFailCount_Count{instance=~\"${instance}\"}",
"legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
],
- "title": "metrics_WriteDataHardSplitCount_Count",
+ "title": "metrics_OpenStreamFailCount_Count",
"type": "timeseries"
- }
- ],
- "title": "PushRelatives",
- "type": "row"
- },
- {
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 4
- },
- "id": 12,
- "panels": [
+ },
{
"datasource": {
"type": "prometheus",
@@ -4172,6 +4730,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4185,6 +4744,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4206,15 +4766,15 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- },
- "unit": "ms"
+ }
},
"overrides": []
},
@@ -4222,9 +4782,9 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 5
+ "y": 29
},
- "id": 66,
+ "id": 82,
"options": {
"legend": {
"calcs": [],
@@ -4243,12 +4803,14 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "expr": "metrics_OpenStreamTime_Mean{instance=~\"${instance}\"}",
+ "editorMode": "code",
+ "expr":
"metrics_FetchChunkSuccessCount_Count{instance=~\"${instance}\"}",
"legendFormat": "${baseLegend}",
+ "range": true,
"refId": "A"
}
],
- "title": "metrics_OpenStreamTime_Mean",
+ "title": "metrics_FetchChunkSuccessCount_Count",
"type": "timeseries"
},
{
@@ -4262,6 +4824,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4275,6 +4838,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4296,15 +4860,15 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- },
- "unit": "ms"
+ }
},
"overrides": []
},
@@ -4312,9 +4876,9 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 5
+ "y": 29
},
- "id": 96,
+ "id": 75,
"options": {
"legend": {
"calcs": [],
@@ -4333,12 +4897,14 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "expr": "metrics_OpenStreamTime_Max{instance=~\"${instance}\"}",
+ "editorMode": "code",
+ "expr":
"metrics_FetchChunkFailCount_Count{instance=~\"${instance}\"}",
"legendFormat": "${baseLegend}",
+ "range": true,
"refId": "A"
}
],
- "title": "metrics_OpenStreamTime_Max",
+ "title": "metrics_FetchChunkFailCount_Count",
"type": "timeseries"
},
{
@@ -4352,6 +4918,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4365,6 +4932,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4386,15 +4954,15 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- },
- "unit": "ms"
+ }
},
"overrides": []
},
@@ -4402,9 +4970,9 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 13
+ "y": 37
},
- "id": 17,
+ "id": 73,
"options": {
"legend": {
"calcs": [],
@@ -4423,12 +4991,14 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "expr": "metrics_FetchChunkTime_Mean{instance=~\"${instance}\"}",
+ "editorMode": "code",
+ "expr":
"metrics_ActiveChunkStreamCount_Value{instance=~\"${instance}\"}",
"legendFormat": "${baseLegend}",
+ "range": true,
"refId": "A"
}
],
- "title": "metrics_FetchChunkTime_Mean",
+ "title": "metrics_ActiveChunkStreamCount_Value",
"type": "timeseries"
},
{
@@ -4442,6 +5012,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4455,6 +5026,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4476,15 +5048,15 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- },
- "unit": "ms"
+ }
},
"overrides": []
},
@@ -4492,9 +5064,9 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 13
+ "y": 37
},
- "id": 18,
+ "id": 220,
"options": {
"legend": {
"calcs": [],
@@ -4513,12 +5085,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "expr": "metrics_FetchChunkTime_Max{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
- "refId": "A"
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferSize_Count",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_FetchChunkTime_Max",
+ "title": "FetchChunkTransferSizeCount",
"type": "timeseries"
},
{
@@ -4532,6 +5111,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4545,6 +5125,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4566,14 +5147,16 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- }
+ },
+ "unit": "bytes"
},
"overrides": []
},
@@ -4581,9 +5164,9 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 21
+ "y": 45
},
- "id": 81,
+ "id": 222,
"options": {
"legend": {
"calcs": [],
@@ -4602,14 +5185,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "editorMode": "code",
- "expr":
"metrics_OpenStreamSuccessCount_Count{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferSize_Max",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
"range": true,
- "refId": "A"
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_OpenStreamSuccessCount_Count",
+ "title": "FetchChunkTransferSizeMax",
"type": "timeseries"
},
{
@@ -4623,6 +5211,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4636,6 +5225,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4657,14 +5247,16 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- }
+ },
+ "unit": "bytes"
},
"overrides": []
},
@@ -4672,9 +5264,9 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 21
+ "y": 45
},
- "id": 77,
+ "id": 221,
"options": {
"legend": {
"calcs": [],
@@ -4693,14 +5285,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "editorMode": "code",
- "expr":
"metrics_OpenStreamFailCount_Count{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferSize_Mean",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
"range": true,
- "refId": "A"
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_OpenStreamFailCount_Count",
+ "title": "FetchChunkTransferSizeMean",
"type": "timeseries"
},
{
@@ -4714,6 +5311,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4727,6 +5325,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4748,7 +5347,8 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
@@ -4763,9 +5363,9 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 29
+ "y": 53
},
- "id": 82,
+ "id": 223,
"options": {
"legend": {
"calcs": [],
@@ -4784,14 +5384,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "editorMode": "code",
- "expr":
"metrics_FetchChunkSuccessCount_Count{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferTime_Count",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
"range": true,
- "refId": "A"
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_FetchChunkSuccessCount_Count",
+ "title": "FetchChunkTransferTimeCount",
"type": "timeseries"
},
{
@@ -4805,6 +5410,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4818,6 +5424,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4839,14 +5446,16 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- }
+ },
+ "unit": "ms"
},
"overrides": []
},
@@ -4854,9 +5463,9 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 29
+ "y": 53
},
- "id": 75,
+ "id": 224,
"options": {
"legend": {
"calcs": [],
@@ -4875,14 +5484,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "editorMode": "code",
- "expr":
"metrics_FetchChunkFailCount_Count{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferTime_Mean",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
"range": true,
- "refId": "A"
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_FetchChunkFailCount_Count",
+ "title": "FetchChunkTransferTimeMean",
"type": "timeseries"
},
{
@@ -4896,6 +5510,7 @@
"mode": "palette-classic"
},
"custom": {
+ "axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@@ -4909,6 +5524,7 @@
"tooltip": false,
"viz": false
},
+ "insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@@ -4930,14 +5546,16 @@
"mode": "absolute",
"steps": [
{
- "color": "green"
+ "color": "green",
+ "value": null
},
{
"color": "red",
"value": 80
}
]
- }
+ },
+ "unit": "ms"
},
"overrides": []
},
@@ -4945,9 +5563,9 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 37
+ "y": 61
},
- "id": 73,
+ "id": 225,
"options": {
"legend": {
"calcs": [],
@@ -4966,14 +5584,19 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
- "editorMode": "code",
- "expr":
"metrics_ActiveChunkStreamCount_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
+ "disableTextWrap": false,
+ "editorMode": "builder",
+ "expr": "metrics_FetchChunkTransferTime_Max",
+ "fullMetaSearch": false,
+ "includeNullMetadata": true,
+ "instant": false,
+ "legendFormat": "__auto",
"range": true,
- "refId": "A"
+ "refId": "A",
+ "useBackend": false
}
],
- "title": "metrics_ActiveChunkStreamCount_Value",
+ "title": "FetchChunkTransferTimeMax",
"type": "timeseries"
}
],
@@ -5051,7 +5674,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 6
+ "y": 54
},
"id": 78,
"options": {
@@ -5141,7 +5764,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 6
+ "y": 54
},
"id": 80,
"options": {
@@ -5231,7 +5854,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 14
+ "y": 62
},
"id": 4,
"options": {
@@ -5321,7 +5944,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 14
+ "y": 62
},
"id": 6,
"options": {
@@ -5411,7 +6034,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 23
+ "y": 71
},
"id": 56,
"options": {
@@ -5501,7 +6124,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 23
+ "y": 71
},
"id": 58,
"options": {
@@ -5605,7 +6228,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 7
+ "y": 55
},
"id": 19,
"options": {
@@ -5698,7 +6321,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 7
+ "y": 55
},
"id": 190,
"options": {
@@ -5722,8 +6345,8 @@
},
"editorMode": "builder",
"expr":
"metrics_DirectMemoryUsageRatio_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
"instant": false,
+ "legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
@@ -5792,7 +6415,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 15
+ "y": 63
},
"id": 191,
"options": {
@@ -5816,8 +6439,8 @@
},
"editorMode": "builder",
"expr":
"metrics_MemoryFileStorageSize_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
"instant": false,
+ "legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
@@ -5885,7 +6508,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 15
+ "y": 63
},
"id": 188,
"options": {
@@ -5909,8 +6532,8 @@
},
"editorMode": "builder",
"expr":
"metrics_MemoryStorageFileCount_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
"instant": false,
+ "legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
@@ -5979,7 +6602,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 23
+ "y": 71
},
"id": 20,
"options": {
@@ -6069,7 +6692,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 23
+ "y": 71
},
"id": 187,
"options": {
@@ -6092,8 +6715,8 @@
},
"editorMode": "builder",
"expr":
"metrics_EvictedFileCount_Value{instance=~\"${instance}\"}",
- "legendFormat": "${baseLegend}",
"instant": false,
+ "legendFormat": "${baseLegend}",
"range": true,
"refId": "A"
}
@@ -6162,7 +6785,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 31
+ "y": 79
},
"id": 165,
"options": {
@@ -6254,7 +6877,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 31
+ "y": 79
},
"id": 166,
"options": {
@@ -6341,7 +6964,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 39
+ "y": 87
},
"id": 158,
"options": {
@@ -6428,7 +7051,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 39
+ "y": 87
},
"id": 164,
"options": {
@@ -6515,7 +7138,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 47
+ "y": 95
},
"id": 201,
"options": {
@@ -6602,7 +7225,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 47
+ "y": 95
},
"id": 202,
"options": {
@@ -6689,7 +7312,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 55
+ "y": 103
},
"id": 171,
"options": {
@@ -6776,7 +7399,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 55
+ "y": 103
},
"id": 173,
"options": {
@@ -6863,7 +7486,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 63
+ "y": 111
},
"id": 178,
"options": {
@@ -6955,7 +7578,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 63
+ "y": 111
},
"id": 167,
"options": {
@@ -7047,7 +7670,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 71
+ "y": 119
},
"id": 168,
"options": {
@@ -7134,7 +7757,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 71
+ "y": 119
},
"id": 203,
"options": {
@@ -7221,7 +7844,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 79
+ "y": 127
},
"id": 204,
"options": {
@@ -7308,7 +7931,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 79
+ "y": 127
},
"id": 205,
"options": {
@@ -7395,7 +8018,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 87
+ "y": 135
},
"id": 206,
"options": {
@@ -7482,7 +8105,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 87
+ "y": 135
},
"id": 207,
"options": {
@@ -7569,7 +8192,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 95
+ "y": 143
},
"id": 208,
"options": {
@@ -7656,7 +8279,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 95
+ "y": 143
},
"id": 209,
"options": {
@@ -7748,7 +8371,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 103
+ "y": 151
},
"id": 169,
"options": {
@@ -7840,7 +8463,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 103
+ "y": 151
},
"id": 170,
"options": {
@@ -7927,7 +8550,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 111
+ "y": 159
},
"id": 210,
"options": {
@@ -8014,7 +8637,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 111
+ "y": 159
},
"id": 211,
"options": {
@@ -8101,7 +8724,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 119
+ "y": 167
},
"id": 212,
"options": {
@@ -8188,7 +8811,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 119
+ "y": 167
},
"id": 213,
"options": {
@@ -8275,7 +8898,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 127
+ "y": 175
},
"id": 214,
"options": {
@@ -8362,7 +8985,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 127
+ "y": 175
},
"id": 215,
"options": {
@@ -8449,7 +9072,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 135
+ "y": 183
},
"id": 216,
"options": {
@@ -8540,7 +9163,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 135
+ "y": 183
},
"id": 108,
"options": {
@@ -8632,7 +9255,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 143
+ "y": 191
},
"id": 104,
"options": {
@@ -8723,7 +9346,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 143
+ "y": 191
},
"id": 106,
"options": {
@@ -8829,7 +9452,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 885
+ "y": 933
},
"id": 44,
"options": {
@@ -8919,7 +9542,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 885
+ "y": 933
},
"id": 46,
"options": {
@@ -9008,7 +9631,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 893
+ "y": 941
},
"id": 192,
"options": {
@@ -9098,7 +9721,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 893
+ "y": 941
},
"id": 180,
"options": {
@@ -9189,7 +9812,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 901
+ "y": 949
},
"id": 88,
"options": {
@@ -9279,7 +9902,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 901
+ "y": 949
},
"id": 135,
"options": {
@@ -9386,7 +10009,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 910
+ "y": 958
},
"id": 159,
"options": {
@@ -9479,7 +10102,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 910
+ "y": 958
},
"id": 160,
"options": {
@@ -9572,7 +10195,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 918
+ "y": 966
},
"id": 161,
"options": {
@@ -9678,7 +10301,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 593
+ "y": 641
},
"id": 139,
"options": {
@@ -9770,7 +10393,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 593
+ "y": 641
},
"id": 141,
"options": {
@@ -9862,7 +10485,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 666
+ "y": 714
},
"id": 142,
"options": {
@@ -9954,7 +10577,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 666
+ "y": 714
},
"id": 143,
"options": {
@@ -10046,7 +10669,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 675
+ "y": 723
},
"id": 144,
"options": {
@@ -10138,7 +10761,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 675
+ "y": 723
},
"id": 145,
"options": {
@@ -10230,7 +10853,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 684
+ "y": 732
},
"id": 146,
"options": {
@@ -10322,7 +10945,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 684
+ "y": 732
},
"id": 147,
"options": {
@@ -10414,7 +11037,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 693
+ "y": 741
},
"id": 148,
"options": {
@@ -10506,7 +11129,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 693
+ "y": 741
},
"id": 149,
"options": {
@@ -10598,7 +11221,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 702
+ "y": 750
},
"id": 150,
"options": {
@@ -10690,7 +11313,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 702
+ "y": 750
},
"id": 151,
"options": {
@@ -10781,7 +11404,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 711
+ "y": 759
},
"id": 153,
"options": {
@@ -10872,7 +11495,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 711
+ "y": 759
},
"id": 154,
"options": {
@@ -10963,7 +11586,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 719
+ "y": 767
},
"id": 155,
"options": {
@@ -11054,7 +11677,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 719
+ "y": 767
},
"id": 200,
"options": {
@@ -11146,7 +11769,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 727
+ "y": 775
},
"id": 198,
"options": {
@@ -11238,7 +11861,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 727
+ "y": 775
},
"id": 199,
"options": {
@@ -11330,7 +11953,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 736
+ "y": 784
},
"id": 196,
"options": {
@@ -11422,7 +12045,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 736
+ "y": 784
},
"id": 197,
"options": {
@@ -11527,7 +12150,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 998
+ "y": 1046
},
"id": 112,
"options": {
@@ -11618,7 +12241,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 998
+ "y": 1046
},
"id": 116,
"options": {
@@ -11724,7 +12347,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 1007
+ "y": 1055
},
"id": 125,
"options": {
@@ -11817,7 +12440,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 1007
+ "y": 1055
},
"id": 126,
"options": {
@@ -11910,7 +12533,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 1015
+ "y": 1063
},
"id": 163,
"options": {
@@ -12003,7 +12626,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 1015
+ "y": 1063
},
"id": 162,
"options": {
@@ -12096,7 +12719,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 1023
+ "y": 1071
},
"id": 127,
"options": {
@@ -12202,7 +12825,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 1032
+ "y": 1080
},
"id": 174,
"options": {
@@ -12295,7 +12918,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 1032
+ "y": 1080
},
"id": 176,
"options": {
@@ -12387,7 +13010,7 @@
"h": 9,
"w": 12,
"x": 0,
- "y": 1041
+ "y": 1089
},
"id": 175,
"options": {
@@ -12480,7 +13103,7 @@
"h": 9,
"w": 12,
"x": 12,
- "y": 1041
+ "y": 1089
},
"id": 177,
"options": {
@@ -12577,11 +13200,10 @@
"from": "now-30m",
"to": "now"
},
- "timeRangeUpdatedDuringEditOrView": false,
"timepicker": {},
"timezone": "",
"title": "Celeborn",
"uid": "U_qgru_7z",
- "version": 2,
+ "version": 1,
"weekStart": ""
-}
+}
\ No newline at end of file
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
b/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
index fe6cd5c77..166e5c343 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
@@ -111,6 +111,19 @@ public class TransportContext implements Closeable {
this(conf, msgHandler, closeIdleConnections, null, enableHeartbeat,
source);
}
+ public TransportContext(
+ TransportConf conf,
+ BaseMessageHandler msgHandler,
+ boolean closeIdleConnections,
+ boolean enableHeartbeat,
+ AbstractSource source,
+ boolean collectFetchChunkDetailMetrics) {
+ this(conf, msgHandler, closeIdleConnections, null, enableHeartbeat,
source);
+ if (collectFetchChunkDetailMetrics) {
+ ENCODER.setSource(source);
+ }
+ }
+
public TransportContext(
TransportConf conf, BaseMessageHandler msgHandler, boolean
closeIdleConnections) {
this(conf, msgHandler, closeIdleConnections, null, false, null);
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageEncoder.java
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageEncoder.java
index 6fcf50a91..91c695c4f 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageEncoder.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageEncoder.java
@@ -26,6 +26,8 @@ import io.netty.handler.codec.MessageToMessageEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.celeborn.common.metrics.source.AbstractSource;
+
/**
* Encoder used by the server side to encode server-to-client responses. This
encoder is stateless
* so it is safe to be shared by multiple threads.
@@ -37,6 +39,8 @@ public final class MessageEncoder extends
MessageToMessageEncoder<Message> {
public static final MessageEncoder INSTANCE = new MessageEncoder();
+ public AbstractSource source;
+
private MessageEncoder() {}
/**
@@ -85,9 +89,13 @@ public final class MessageEncoder extends
MessageToMessageEncoder<Message> {
if (body != null) {
// We transfer ownership of the reference on in.body() to
MessageWithHeader.
// This reference will be freed when MessageWithHeader.deallocate() is
called.
- out.add(new MessageWithHeader(in.body(), header, body, bodyLength));
+ out.add(new MessageWithHeader(in.body(), header, body, bodyLength,
source));
} else {
out.add(header);
}
}
+
+ public void setSource(AbstractSource source) {
+ this.source = source;
+ }
}
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
index c2de26128..4c1c58c8e 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
@@ -20,6 +20,7 @@ package org.apache.celeborn.common.network.protocol;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.WritableByteChannel;
+import java.util.UUID;
import javax.annotation.Nullable;
@@ -28,6 +29,7 @@ import io.netty.buffer.ByteBuf;
import io.netty.channel.FileRegion;
import io.netty.util.ReferenceCountUtil;
+import org.apache.celeborn.common.metrics.source.AbstractSource;
import org.apache.celeborn.common.network.buffer.ManagedBuffer;
import org.apache.celeborn.common.network.util.AbstractFileRegion;
@@ -44,7 +46,7 @@ class MessageWithHeader extends AbstractFileRegion {
private final Object body;
private final long bodyLength;
private long totalBytesTransferred;
-
+ private AbstractSource source;
/**
* When the write buffer size is larger than this limit, I/O will be done in
chunks of this size.
* The size should not be too large as it will waste underlying memory copy.
e.g. If network
@@ -78,6 +80,16 @@ class MessageWithHeader extends AbstractFileRegion {
this.bodyLength = bodyLength;
}
+ MessageWithHeader(
+ @Nullable ManagedBuffer managedBuffer,
+ ByteBuf header,
+ Object body,
+ long bodyLength,
+ AbstractSource source) {
+ this(managedBuffer, header, body, bodyLength);
+ this.source = source;
+ }
+
@Override
public long count() {
return headerLength + bodyLength;
@@ -116,7 +128,16 @@ class MessageWithHeader extends AbstractFileRegion {
// Bytes written for body in this call.
long writtenBody = 0;
if (body instanceof FileRegion) {
+ String key = "";
+ if (source != null) {
+ key = UUID.randomUUID().toString();
+ source.startTimer("FetchChunkTransferTime", key);
+ }
writtenBody = ((FileRegion) body).transferTo(target,
totalBytesTransferred - headerLength);
+ if (source != null) {
+ source.stopTimer("FetchChunkTransferTime", key);
+ source.updateHistogram("FetchChunkTransferSize", writtenBody);
+ }
} else if (body instanceof ByteBuf) {
writtenBody = copyByteBuf((ByteBuf) body, target);
}
diff --git
a/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
b/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
index e9c4cfa3b..7391d0529 100644
---
a/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
+++
b/common/src/main/scala/org/apache/celeborn/common/metrics/source/AbstractSource.scala
@@ -62,6 +62,8 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
val timerSupplier = new TimerSupplier(metricsSlidingWindowSize)
+ val histogramSupplier = new HistogramSupplier(metricsSlidingWindowSize)
+
val metricsCleaner: ScheduledExecutorService =
ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-metrics-cleaner")
@@ -93,6 +95,9 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
protected val namedMeters: ConcurrentHashMap[String, NamedMeter] =
JavaUtils.newConcurrentHashMap[String, NamedMeter]()
+ protected val namedHistogram: ConcurrentHashMap[String, NamedHistogram] =
+ JavaUtils.newConcurrentHashMap[String, NamedHistogram]()
+
def addTimerMetrics(namedTimer: NamedTimer): Unit = {
val timerMetricsString = getTimerMetrics(namedTimer)
timerMetrics.add(timerMetricsString)
@@ -183,6 +188,20 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
NamedCounter(name, metricRegistry.counter(metricNameWithLabel), labels
++ staticLabels))
}
+ def addHistogram(name: String): Unit = {
+ addHistogram(name, Map.empty)
+ }
+
+ def addHistogram(name: String, labels: Map[String, String]): Unit = {
+ val metricNameWithLabel = metricNameWithCustomizedLabels(name, labels)
+ namedHistogram.putIfAbsent(
+ metricNameWithLabel,
+ NamedHistogram(
+ name,
+ metricRegistry.histogram(name, histogramSupplier),
+ labels ++ staticLabels))
+ }
+
def counters(): List[NamedCounter] = {
namedCounters.values().asScala.toList
}
@@ -196,7 +215,7 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
}
def histograms(): List[NamedHistogram] = {
- List.empty[NamedHistogram]
+ namedHistogram.values().asScala.toList
}
def timers(): List[NamedTimer] = {
@@ -341,6 +360,20 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
}
}
+ def updateHistogram(name: String, value: Long): Unit = {
+ updateHistogram(name, Map.empty, value)
+ }
+
+ def updateHistogram(name: String, labels: Map[String, String], value: Long):
Unit = {
+ val metricNameWithLabel = metricNameWithCustomizedLabels(name, labels)
+ val histogram = namedHistogram.get(metricNameWithLabel)
+ if (histogram != null) {
+ histogram.histogram.update(value)
+ } else {
+ logWarning(s"Metric $metricNameWithLabel not found!")
+ }
+ }
+
private def clearOldValues(map: ConcurrentHashMap[String, Long]): Unit = {
if (map.size > 5000) {
// remove values has existed more than 15 min
@@ -402,21 +435,21 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
val prefix = normalizeKey(nh.name)
val label = nh.labelString
sb.append(s"${prefix}Count$label ${nh.histogram.getCount} $timestamp\n")
- sb.append(s"${prefix}Max$label ${reportNanosAsMills(snapshot.getMax)}
$timestamp\n")
- sb.append(s"${prefix}Mean$label ${reportNanosAsMills(snapshot.getMean)}
$timestamp\n")
- sb.append(s"${prefix}Min$label ${reportNanosAsMills(snapshot.getMin)}
$timestamp\n")
+ sb.append(s"${prefix}Max$label ${(snapshot.getMax)} $timestamp\n")
+ sb.append(s"${prefix}Mean$label ${(snapshot.getMean)} $timestamp\n")
+ sb.append(s"${prefix}Min$label ${(snapshot.getMin)} $timestamp\n")
sb.append(s"${prefix}50thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.getMedian)} $timestamp\n")
+ s" ${snapshot.getMedian} $timestamp\n")
sb.append(s"${prefix}75thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.get75thPercentile)} $timestamp\n")
+ s" ${snapshot.get75thPercentile} $timestamp\n")
sb.append(s"${prefix}95thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.get95thPercentile)} $timestamp\n")
+ s" ${snapshot.get95thPercentile} $timestamp\n")
sb.append(s"${prefix}98thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.get98thPercentile)} $timestamp\n")
+ s" ${snapshot.get98thPercentile} $timestamp\n")
sb.append(s"${prefix}99thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.get99thPercentile)} $timestamp\n")
+ s" ${snapshot.get99thPercentile} $timestamp\n")
sb.append(s"${prefix}999thPercentile$label" +
- s" ${reportNanosAsMills(snapshot.get999thPercentile)} $timestamp\n")
+ s" ${snapshot.get999thPercentile} $timestamp\n")
sb.toString()
}
@@ -487,8 +520,6 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
sb.append(getMeterMetrics(m))
case h: NamedHistogram =>
sb.append(getHistogramMetrics(h))
- h.asInstanceOf[CelebornHistogram].reservoir
- .asInstanceOf[ResettableSlidingWindowReservoir].reset()
case t: NamedTimer =>
sb.append(getTimerMetrics(t))
t.timer.asInstanceOf[CelebornTimer].reservoir
@@ -506,6 +537,7 @@ abstract class AbstractSource(conf: CelebornConf, role:
String)
namedMeters.clear()
namedTimers.clear()
timerMetrics.clear()
+ namedHistogram.clear()
metricRegistry.removeMatching(new MetricFilter {
override def matches(s: String, metric: Metric): Boolean = true
})
@@ -544,3 +576,10 @@ class GaugeSupplier[T](f: () => T) extends
MetricRegistry.MetricSupplier[Gauge[_
class MeterSupplier(f: () => Long) extends
MetricRegistry.MetricSupplier[Meter] {
override def newMetric(): Meter = new Meter { override def getCount: Long =
f() }
}
+
+class HistogramSupplier(val slidingWindowSize: Int)
+ extends MetricRegistry.MetricSupplier[Histogram] {
+ override def newMetric(): Histogram = {
+ new CelebornHistogram(new
ResettableSlidingWindowReservoir(slidingWindowSize))
+ }
+}
diff --git
a/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
b/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
index d6eeb2358..1644e4e87 100644
---
a/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
+++
b/common/src/test/scala/org/apache/celeborn/common/metrics/source/CelebornSourceSuite.scala
@@ -22,6 +22,22 @@ import org.apache.celeborn.common.CelebornConf
class CelebornSourceSuite extends CelebornFunSuite {
+ test("test histogram") {
+ val conf = new CelebornConf()
+
+ val mockSource = new AbstractSource(conf, Role.WORKER) {
+ override def sourceName: String = "mockSource"
+ }
+ val histogram = "abc"
+ mockSource.addHistogram(histogram)
+ for (i <- 1 to 100) {
+ mockSource.updateHistogram(histogram, 10)
+ }
+ val res = mockSource.getMetrics()
+
+ assert(res.contains("metrics_abc_Count"))
+ }
+
test("test getMetrics with customized label") {
val conf = new CelebornConf()
createAbstractSourceAndCheck(conf, "", Role.MASTER)
diff --git
a/service/src/main/scala/org/apache/celeborn/common/metrics/sink/JsonServlet.scala
b/service/src/main/scala/org/apache/celeborn/common/metrics/sink/JsonServlet.scala
index 4ed2c5a3a..5ba3aca7c 100644
---
a/service/src/main/scala/org/apache/celeborn/common/metrics/sink/JsonServlet.scala
+++
b/service/src/main/scala/org/apache/celeborn/common/metrics/sink/JsonServlet.scala
@@ -26,7 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.{ClassTagExtensions,
DefaultScalaModule}
import org.eclipse.jetty.servlet.ServletContextHandler
-import org.apache.celeborn.common.metrics.{CelebornHistogram, CelebornTimer,
ResettableSlidingWindowReservoir}
+import org.apache.celeborn.common.metrics.{CelebornTimer,
ResettableSlidingWindowReservoir}
import org.apache.celeborn.common.metrics.source._
import org.apache.celeborn.server.common.http.HttpUtils
import org.apache.celeborn.server.common.http.HttpUtils.ServletParams
@@ -89,8 +89,6 @@ class JsonServlet(
absSource.gauges().foreach(g => recordGauge(absSource, g, metricDatas))
absSource.histograms().foreach(h => {
recordHistogram(absSource, h, metricDatas)
- h.asInstanceOf[CelebornHistogram].reservoir
- .asInstanceOf[ResettableSlidingWindowReservoir].reset()
})
absSource.timers().foreach(t => {
recordTimer(absSource, t, metricDatas)
diff --git
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
index de7190404..eac9a5ef4 100644
---
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
+++
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
@@ -259,7 +259,8 @@ private[celeborn] class Worker(
fetchHandler,
closeIdleConnections,
conf.workerFetchHeartbeatEnabled,
- workerSource)
+ workerSource,
+ conf.metricsCollectCriticalEnabled)
(
transportContext,
transportContext.createServer(conf.workerFetchPort,
getServerBootstraps(transportConf)))
diff --git
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
index 26532a6bf..8275e3595 100644
---
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
+++
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/WorkerSource.scala
@@ -79,9 +79,12 @@ class WorkerSource(conf: CelebornConf) extends
AbstractSource(conf, Role.WORKER)
addTimer(OPEN_STREAM_TIME)
addTimer(TAKE_BUFFER_TIME)
addTimer(SORT_TIME)
+ addTimer(FETCH_CHUNK_TRANSFER_TIME)
addTimer(CLEAN_EXPIRED_SHUFFLE_KEYS_TIME)
+ addHistogram(FETCH_CHUNK_TRANSFER_SIZE)
+
def getCounterCount(metricsName: String): Long = {
val metricNameWithLabel = metricNameWithCustomizedLabels(metricsName,
Map.empty)
namedCounters.get(metricNameWithLabel).counter.getCount
@@ -137,6 +140,8 @@ object WorkerSource {
val OPEN_STREAM_FAIL_COUNT = "OpenStreamFailCount"
val FETCH_CHUNK_SUCCESS_COUNT = "FetchChunkSuccessCount"
val FETCH_CHUNK_FAIL_COUNT = "FetchChunkFailCount"
+ val FETCH_CHUNK_TRANSFER_SIZE = "FetchChunkTransferSize"
+ val FETCH_CHUNK_TRANSFER_TIME = "FetchChunkTransferTime"
// push data
val PRIMARY_PUSH_DATA_TIME = "PrimaryPushDataTime"