(skywalking) branch master updated: Fix inaccurate APISIX metrics (#12108)

wusheng Tue, 16 Apr 2024 02:40:14 -0700

This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git



The following commit(s) were added to refs/heads/master by this push:
     new bea7ed3f4a Fix inaccurate APISIX metrics (#12108)
bea7ed3f4a is described below

commit bea7ed3f4a3f5bca777bf69c7c8ea8d3d787c35a
Author: pg.yang <pg.y...@hotmail.com>
AuthorDate: Tue Apr 16 17:39:37 2024 +0800

    Fix inaccurate APISIX metrics (#12108)
---
 docs/en/api/metrics-query-expression.md            |  3 ---
 docs/en/changes/changes.md                         |  1 +
 .../src/main/resources/otel-rules/apisix.yaml      | 24 ++++++++++----------
 .../apisix/apisix-endpoint.json                    |  6 ++---
 .../apisix/apisix-service.json                     | 26 +++++++++++-----------
 test/e2e-v2/cases/apisix/apisix-cases.yaml         |  4 ++--
 6 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/docs/en/api/metrics-query-expression.md 
b/docs/en/api/metrics-query-expression.md
index fa2446e297..dfba7c2e41 100644
--- a/docs/en/api/metrics-query-expression.md
+++ b/docs/en/api/metrics-query-expression.md
@@ -251,9 +251,6 @@ The order of the new label values should be the same as the 
order of the label v
 For example:
 If we want to query the `service_percentile` metric with the label values 
`50,75,90,95,99`, and rename the label name to `percentile` and the label 
values to `P50,P75,P90,P95,P99`, we can use the following expression:
 
-```text
-and rename the label values to `P50,P75,P90,P95,P99`, we can use the following 
expression:
-
 ```text
 relabel(service_percentile{p='50,75,90,95,99'}, p='50,75,90,95,99', 
percentile='P50,P75,P90,P95,P99')
 ```
diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md
index c530768508..8789812c14 100644
--- a/docs/en/changes/changes.md
+++ b/docs/en/changes/changes.md
@@ -105,6 +105,7 @@
   - `memory_swap_percentage` -> `memory_virtual_memory_percentage`
 * Fix/Change UI init setting for Windows Swap -> Virtual Memory
 * Fix `Memory Swap Usage`/`Virtual Memory Usage` display with UI 
init.(Linux/Windows)
+* Fix inaccurate APISIX metrics
 
 #### UI
 
diff --git 
a/oap-server/server-starter/src/main/resources/otel-rules/apisix.yaml 
b/oap-server/server-starter/src/main/resources/otel-rules/apisix.yaml
index aa3e768b90..4334fb09f3 100644
--- a/oap-server/server-starter/src/main/resources/otel-rules/apisix.yaml
+++ b/oap-server/server-starter/src/main/resources/otel-rules/apisix.yaml
@@ -36,24 +36,24 @@ metricsRules:
   # Service
     # Ignore http_connections metrics with accepted and handled state as the 
actual type is counter
   - name: sv_http_connections
-    exp: 
apisix_nginx_http_current_connections.tagNotMatch('state','accepted|handled').sum(['state','service_name']).service(['service_name']
 , Layer.APISIX)
+    exp: 
apisix_nginx_http_current_connections.tagNotMatch('state','accepted|handled').sum(['state','service_name','node']).service(['service_name']
 , Layer.APISIX)
   - name: sv_http_requests
-    exp: 
apisix_http_requests_total.sum(['service_instance_id','service_name']).rate('PT1M').service(['service_name']
 , Layer.APISIX)
+    exp: 
apisix_http_requests_total.sum(['service_instance_id','service_name','node']).rate('PT1M').service(['service_name']
 , Layer.APISIX)
     # Not match any route
     # Refer to  https://apisix.apache.org/docs/apisix/plugins/prometheus/
   - name: sv_bandwidth_unmatched
-    exp: apisix_bandwidth.tagEqual('route' , '' , 'node' , 
'').sum(['type','service_name']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
+    exp: apisix_bandwidth.tagEqual('route' , '' , 'node' , 
'').sum(['type','service_name','node']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
   - name: sv_http_status_unmatched
-    exp: apisix_http_status.tagEqual('route' , '' , 'node' , 
'').sum(['code','service_name']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
+    exp: apisix_http_status.tagEqual('route' , '' , 'node' , 
'').sum(['code','service_name','node']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
   - name: sv_http_latency_unmatched
-    exp: apisix_http_latency.tagEqual('route' , '' , 'node' , 
'').sum(['type','le','service_name']).histogram().histogram_percentile([50,70,90,99]).service(['service_name']
 , Layer.APISIX)
+    exp: apisix_http_latency.tagEqual('route' , '' , 'node' , 
'').sum(['type','le','service_name','node']).histogram().histogram_percentile([50,70,90,99]).service(['service_name']
 , Layer.APISIX)
    # Match a route
   - name: sv_bandwidth_matched
-    exp: apisix_bandwidth.tagNotEqual('route' , '' , 'node' , 
'').sum(['type','service_name']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
+    exp: apisix_bandwidth.tagNotEqual('route' , '' , 'node' , 
'').sum(['type','service_name','node']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
   - name: sv_http_status_matched
-    exp: apisix_http_status.tagNotEqual('route' , '' , 'node' , 
'').sum(['code','service_name']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
+    exp: apisix_http_status.tagNotEqual('route' , '' , 'node' , 
'').sum(['code','service_name','node']).rate('PT1M').service(['service_name'] , 
Layer.APISIX)
   - name: sv_http_latency_matched
-    exp: apisix_http_latency.tagNotEqual('route' , '' , 'node' , 
'').sum(['type','le','service_name']).histogram().histogram_percentile([50,70,90,99]).service(['service_name']
 , Layer.APISIX)
+    exp: apisix_http_latency.tagNotEqual('route' , '' , 'node' , 
'').sum(['type','le','service_name','node']).histogram().histogram_percentile([50,70,90,99]).service(['service_name']
 , Layer.APISIX)
 
   # Instance
   - name: instance_shared_dict_capacity_bytes
@@ -88,15 +88,15 @@ metricsRules:
   # Endpoint
     # Reorganization metrics which has `route` label as endpoint ,that is 
formatted to `router/{routerId}`
   - name: endpoint_http_status
-    exp: apisix_http_status.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['code','service_name','route']).rate('PT1M').endpoint(['service_name'],['route'],
 Layer.APISIX)
+    exp: apisix_http_status.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['code','service_name','route','node']).rate('PT1M').endpoint(['service_name'],['route'],
 Layer.APISIX)
   - name: endpoint_bandwidth
-    exp: apisix_bandwidth.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['type','service_name','route']).rate('PT1M').endpoint(['service_name'],['route'],
 Layer.APISIX)
+    exp: apisix_bandwidth.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['type','service_name','route','node']).rate('PT1M').endpoint(['service_name'],['route'],
 Layer.APISIX)
   - name: endpoint_http_latency
-    exp: apisix_http_latency.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['type','le','service_name','route']).histogram().histogram_percentile([50,70,90,99]).endpoint(['service_name'],['route'],
 Layer.APISIX)
+    exp: apisix_http_latency.tagNotEqual('route','').tag({tags->tags.route = 
'route/'+tags['route']}).sum(['type','le','service_name','route','node']).histogram().histogram_percentile([50,70,90,99]).endpoint(['service_name'],['route'],
 Layer.APISIX)
     # Reorganization metrics which has `node` label as endpoint , that is 
formatted to `node/{node}`
   - name: endpoint_http_status
     exp: apisix_http_status.tagNotEqual('node','').tag({tags->tags.node = 
'upstream/'+tags['node']}).sum(['code','service_name','node']).rate('PT1M').endpoint(['service_name'],['node'],
 Layer.APISIX)
   - name: endpoint_bandwidth
     exp: apisix_bandwidth.tagNotEqual('node','').tag({tags->tags.node = 
'upstream/'+tags['node']}).sum(['type','service_name','node']).rate('PT1M').endpoint(['service_name'],['node'],
 Layer.APISIX)
   - name: endpoint_http_latency
-    exp: apisix_http_latency.tagNotEqual('node','').tag({tags->tags.node = 
'upstream/'+tags['node']})sum(['type','le','service_name','node']).histogram().histogram_percentile([50,70,90,99]).endpoint(['service_name'],['node'],
 Layer.APISIX)
+    exp: apisix_http_latency.tagNotEqual('node','').tag({tags->tags.node = 
'upstream/'+tags['node']}).sum(['type','le','service_name','node']).histogram().histogram_percentile([50,70,90,99]).endpoint(['service_name'],['node'],
 Layer.APISIX)
diff --git 
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-endpoint.json
 
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-endpoint.json
index 4a85bde867..d418fd0994 100644
--- 
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-endpoint.json
+++ 
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-endpoint.json
@@ -22,7 +22,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_endpoint_http_status"
+            "aggregate_labels(meter_apisix_endpoint_http_status,sum(code))"
           ],
           "associate": [
             {
@@ -55,7 +55,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_endpoint_http_latency"
+            "aggregate_labels(meter_apisix_endpoint_http_latency,avg(type,p))"
           ],
           "associate": [
             {
@@ -88,7 +88,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_endpoint_bandwidth/1024"
+            "aggregate_labels(meter_apisix_endpoint_bandwidth/1024,sum(type))"
           ],
           "associate": [
             {
diff --git 
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-service.json
 
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-service.json
index 3a7f50420f..0637e819f8 100644
--- 
a/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-service.json
+++ 
b/oap-server/server-starter/src/main/resources/ui-initialized-templates/apisix/apisix-service.json
@@ -22,7 +22,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_http_requests"
+            
"aggregate_labels(meter_apisix_sv_http_requests,sum(service_instance_id))"
           ]
         },
         {
@@ -44,7 +44,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_http_status_matched"
+            "aggregate_labels(meter_apisix_sv_http_status_matched,sum(code))"
           ],
           "associate": [
             {
@@ -89,7 +89,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_http_latency_matched"
+            
"aggregate_labels(meter_apisix_sv_http_latency_matched,avg(type,p))"
           ],
           "associate": [
             {
@@ -134,7 +134,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_bandwidth_matched/1024"
+            
"aggregate_labels(meter_apisix_sv_bandwidth_matched/1024,sum(type))"
           ],
           "associate": [
             {
@@ -168,7 +168,7 @@
           "i": "5",
           "type": "Widget",
           "expressions": [
-            "meter_apisix_sv_http_connections"
+            "aggregate_labels(meter_apisix_sv_http_connections,sum(state))"
           ],
           "graph": {
             "type": "Line",
@@ -224,7 +224,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_http_status_unmatched"
+            "aggregate_labels(meter_apisix_sv_http_status_unmatched,sum(code))"
           ],
           "associate": [
             {
@@ -269,7 +269,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_http_latency_unmatched"
+            
"aggregate_labels(meter_apisix_sv_http_latency_unmatched,avg(type,p))"
           ],
           "associate": [
             {
@@ -314,7 +314,7 @@
             "showYAxis": true
           },
           "expressions": [
-            "meter_apisix_sv_bandwidth_unmatched/1024"
+            
"aggregate_labels(meter_apisix_sv_bandwidth_unmatched/1024,sum(type))"
           ],
           "associate": [
             {
@@ -379,11 +379,11 @@
       "isRoot": false,
       "isDefault": true,
       "expressions": [
-        "avg(meter_apisix_sv_http_status_matched{code='200'})",
-        "avg(meter_apisix_sv_http_status_matched{code='304'})",
-        "avg(meter_apisix_sv_http_status_matched{code='404'})",
-        "avg(meter_apisix_sv_http_status_matched{code='499'})",
-        "avg(meter_apisix_sv_http_status_matched{code='503'})"
+        
"avg(aggregate_labels(meter_apisix_sv_http_status_matched{code='200'},sum(code)))",
+        
"avg(aggregate_labels(meter_apisix_sv_http_status_matched{code='304'},sum(code)))",
+        
"avg(aggregate_labels(meter_apisix_sv_http_status_matched{code='404'},sum(code)))",
+        
"avg(aggregate_labels(meter_apisix_sv_http_status_matched{code='499'},sum(code)))",
+        
"avg(aggregate_labels(meter_apisix_sv_http_status_matched{code='503'},sum(code)))"
       ],
       "expressionsConfig": [
         {
diff --git a/test/e2e-v2/cases/apisix/apisix-cases.yaml 
b/test/e2e-v2/cases/apisix/apisix-cases.yaml
index 8f95770e13..df3b3d44c9 100644
--- a/test/e2e-v2/cases/apisix/apisix-cases.yaml
+++ b/test/e2e-v2/cases/apisix/apisix-cases.yaml
@@ -22,7 +22,7 @@
     - query: swctl --display yaml 
--base-url=http://${oap_host}:${oap_12800}/graphql instance ls 
--service-name=APISIX::showcase-apisix-service
       expected:  expected/instance.yml
     # service metrics
-    - query: swctl --display yaml 
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec 
--expression=meter_apisix_sv_http_connections 
--service-name=APISIX::showcase-apisix-service
+    - query: swctl --display yaml 
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec 
--expression='aggregate_labels(meter_apisix_sv_http_connections,sum(state))' 
--service-name=APISIX::showcase-apisix-service
       expected: expected/metrics-has-connection-value-label.yml
     # instance metrics
     - query: |
@@ -31,5 +31,5 @@
         )
       expected: expected/metrics-has-status-value-label.yml
     # endpoint metrics
-    - query: swctl --display yaml 
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec 
--expression=meter_apisix_endpoint_http_latency 
--endpoint-name='route/routes#1' --service-name=APISIX::showcase-apisix-service
+    - query: swctl --display yaml 
--base-url=http://${oap_host}:${oap_12800}/graphql metrics exec 
--expression='aggregate_labels(meter_apisix_endpoint_http_latency,avg(type,p))' 
--endpoint-name='route/routes#1' --service-name=APISIX::showcase-apisix-service
       expected: expected/metrics-has-latency-value-label.yml

(skywalking) branch master updated: Fix inaccurate APISIX metrics (#12108)

Reply via email to