This is an automated email from the ASF dual-hosted git repository.
wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push:
new 8e51a87197 OAP Self observability: Add JVM heap and direct memory used
metrics. (#12974)
8e51a87197 is described below
commit 8e51a8719798ad284b9de7c6cbb79f4fc1f9580f
Author: Wan Kai <[email protected]>
AuthorDate: Wed Jan 15 19:06:46 2025 +0800
OAP Self observability: Add JVM heap and direct memory used metrics.
(#12974)
---
docs/en/changes/changes.md | 2 +
.../src/main/resources/otel-rules/oap.yaml | 4 +-
.../so11y_oap/so11y-instance.json | 59 ++++++++++++----------
.../expected/metrics-has-memory-value-label.yml | 51 +++++++++++++++++++
test/e2e-v2/cases/so11y/so11y-cases.yaml | 2 +-
5 files changed, 90 insertions(+), 28 deletions(-)
diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md
index 266d3d7d43..93f328bff0 100644
--- a/docs/en/changes/changes.md
+++ b/docs/en/changes/changes.md
@@ -62,6 +62,7 @@
* Add Circuit Breaking mechanism.
* BanyanDB: Add support for compatibility checks based on the BanyanDB
server's API version.
* MQE: Support `&&(and)`, `||(or)` bool operators.
+* OAP self observability: Add JVM heap and direct memory used metrics.
#### UI
@@ -86,6 +87,7 @@
* Fix inaccurate data query issue on endpoint topology page.
* Update browser dashboard for the new metrics.
* Visualize `Snapshot` on `Alerting` page.
+* OAP self observability dashboard: Add JVM heap and direct memory used
metrics.
#### Documentation
* Update release document to adopt newly added revision-based process.
diff --git a/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
b/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
index 72e6b92284..e238575627 100644
--- a/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
+++ b/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
@@ -35,7 +35,9 @@ metricsRules:
- name: instance_cpu_percentage
exp: (process_cpu_seconds_total * 100).sum(['service',
'host_name']).rate('PT1M')
- name: instance_jvm_memory_bytes_used
- exp: jvm_memory_bytes_used.sum(['service', 'host_name'])
+ exp: jvm_memory_bytes_used.sum(['service', 'host_name', 'area'])
+ - name: instance_jvm_buffer_pool_bytes_used
+ exp: jvm_buffer_pool_used_bytes.sum(['service', 'host_name', 'pool'])
- name: instance_jvm_gc_count
exp: >
jvm_gc_collection_seconds_count.tagMatch('gc', 'PS
Scavenge|Copy|ParNew|G1 Young Generation|PS
MarkSweep|MarkSweepCompact|ConcurrentMarkSweep|G1 Old Generation')
diff --git
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
index 3a9403c260..edb00a54d8 100644
---
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
+++
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
@@ -44,7 +44,7 @@
"i": "1",
"type": "Widget",
"widget": {
- "title": "Memory (MB)"
+ "title": "Memory Used (MB)"
},
"graph": {
"type": "Line",
@@ -55,12 +55,13 @@
"showYAxis": true
},
"expressions": [
- "meter_oap_instance_jvm_memory_bytes_used/1024/1024"
+ "meter_oap_instance_jvm_memory_bytes_used/1024/1024",
+ ""
]
},
{
- "x": 12,
- "y": 0,
+ "x": 0,
+ "y": 13,
"w": 6,
"h": 13,
"i": "0",
@@ -100,7 +101,7 @@
},
{
"x": 0,
- "y": 13,
+ "y": 26,
"w": 6,
"h": 13,
"i": "11",
@@ -193,7 +194,7 @@
},
{
"x": 0,
- "y": 26,
+ "y": 39,
"w": 6,
"h": 13,
"i": "15",
@@ -317,10 +318,10 @@
]
},
{
- "x": 0,
+ "x": 12,
"y": 39,
"w": 6,
- "h": 11,
+ "h": 13,
"i": "16",
"type": "Widget",
"expressions": [
@@ -347,7 +348,7 @@
"x": 6,
"y": 39,
"w": 6,
- "h": 11,
+ "h": 13,
"i": "17",
"type": "Widget",
"widget": {
@@ -369,6 +370,28 @@
"label": "Error Count"
}
]
+ },
+ {
+ "x": 12,
+ "y": 0,
+ "w": 6,
+ "h": 13,
+ "i": "18",
+ "type": "Widget",
+ "expressions": [
+ "meter_oap_instance_jvm_buffer_pool_bytes_used/1024/1024"
+ ],
+ "graph": {
+ "type": "Line",
+ "step": false,
+ "smooth": false,
+ "showSymbol": true,
+ "showXAxis": true,
+ "showYAxis": true
+ },
+ "widget": {
+ "title": "Buffer Pool Used(MB)"
+ }
}
]
},
@@ -691,23 +714,7 @@
"layer": "SO11Y_OAP",
"entity": "ServiceInstance",
"name": "Self-Observability-OAP-Instance",
- "id": "Self-Observability-OAP-Instance",
- "isRoot": false,
- "isDefault": true,
- "expressions": [
- "avg(meter_oap_instance_cpu_percentage)",
- "avg(meter_oap_instance_persistence_execute_count)"
- ],
- "expressionsConfig": [
- {
- "unit": "%",
- "label": "CPU Avg Usage"
- },
- {
- "unit": "count / 5min",
- "label": "Persistence Count"
- }
- ]
+ "isRoot": false
}
}
]
diff --git
a/test/e2e-v2/cases/so11y/expected/metrics-has-memory-value-label.yml
b/test/e2e-v2/cases/so11y/expected/metrics-has-memory-value-label.yml
new file mode 100644
index 0000000000..8b5a24de62
--- /dev/null
+++ b/test/e2e-v2/cases/so11y/expected/metrics-has-memory-value-label.yml
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+debuggingtrace: null
+type: TIME_SERIES_VALUES
+results:
+ {{- contains .results }}
+ - metric:
+ labels:
+ - key: area
+ value: "heap"
+ values:
+ {{- contains .values }}
+ - id: {{ notEmpty .id }}
+ value: {{ .value }}
+ owner: null
+ traceid: null
+ - id: {{ notEmpty .id }}
+ value: null
+ owner: null
+ traceid: null
+ {{- end}}
+ - metric:
+ labels:
+ - key: area
+ value: "nonheap"
+ values:
+ {{- contains .values }}
+ - id: {{ notEmpty .id }}
+ value: {{ .value }}
+ owner: null
+ traceid: null
+ - id: {{ notEmpty .id }}
+ value: null
+ owner: null
+ traceid: null
+ {{- end}}
+ {{- end}}
+error: null
diff --git a/test/e2e-v2/cases/so11y/so11y-cases.yaml
b/test/e2e-v2/cases/so11y/so11y-cases.yaml
index 331f0f88b2..1771da920d 100644
--- a/test/e2e-v2/cases/so11y/so11y-cases.yaml
+++ b/test/e2e-v2/cases/so11y/so11y-cases.yaml
@@ -27,7 +27,7 @@
- query: swctl --display yaml
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec
--expression=meter_oap_instance_cpu_percentage
--instance-name=http://localhost:1234 --service-name=oap-server
expected: expected/metrics-has-value.yml
- query: swctl --display yaml
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec
--expression=meter_oap_instance_jvm_memory_bytes_used
--instance-name=http://localhost:1234 --service-name=oap-server
- expected: expected/metrics-has-value.yml
+ expected: expected/metrics-has-memory-value-label.yml
- query: swctl --display yaml
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec
--expression=meter_oap_instance_trace_count
--instance-name=http://localhost:1234 --service-name=oap-server
expected: expected/metrics-has-value.yml
- query: swctl --display yaml
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec
--expression="meter_oap_instance_metrics_aggregation{level='L1 aggregation'}"
--instance-name=http://localhost:1234 --service-name=oap-server