This is an automated email from the ASF dual-hosted git repository.
ashishtiwari pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new de7bd0bb4 fix: each metric should have an expire time (#11838)
de7bd0bb4 is described below
commit de7bd0bb47782f61e7b170c8d42a21b7daa5a001
Author: Ashish Tiwari <[email protected]>
AuthorDate: Tue Dec 17 14:22:39 2024 +0530
fix: each metric should have an expire time (#11838)
---
apisix/plugins/prometheus/exporter.lua | 20 +++--
conf/config.yaml.example | 12 ++-
t/plugin/prometheus-metric-expire.t | 132 +++++++++++++++++++++++++++++++++
t/plugin/prometheus4.t | 98 ++----------------------
4 files changed, 157 insertions(+), 105 deletions(-)
diff --git a/apisix/plugins/prometheus/exporter.lua
b/apisix/plugins/prometheus/exporter.lua
index fc282031e..a255e25ab 100644
--- a/apisix/plugins/prometheus/exporter.lua
+++ b/apisix/plugins/prometheus/exporter.lua
@@ -136,10 +136,14 @@ function _M.http_init(prometheus_enabled_in_stream)
metric_prefix = attr.metric_prefix
end
- local exptime
- if attr and attr.expire then
- exptime = attr.expire
- end
+ local status_metrics_exptime = core.table.try_read_attr(attr, "metrics",
+ "http_status", "expire")
+ local latency_metrics_exptime = core.table.try_read_attr(attr, "metrics",
+ "http_latency", "expire")
+ local bandwidth_metrics_exptime = core.table.try_read_attr(attr, "metrics",
+ "bandwidth", "expire")
+ local upstream_status_exptime = core.table.try_read_attr(attr, "metrics",
+ "upstream_status", "expire")
prometheus = base_prometheus.init("prometheus-metrics", metric_prefix)
@@ -172,7 +176,7 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.upstream_status = prometheus:gauge("upstream_status",
"Upstream status from health check",
{"name", "ip", "port"},
- exptime)
+ upstream_status_exptime)
-- per service
@@ -183,7 +187,7 @@ function _M.http_init(prometheus_enabled_in_stream)
"HTTP status codes per service in APISIX",
{"code", "route", "matched_uri", "matched_host", "service",
"consumer", "node",
unpack(extra_labels("http_status"))},
- exptime)
+ status_metrics_exptime)
local buckets = DEFAULT_BUCKETS
if attr and attr.default_buckets then
@@ -193,12 +197,12 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.latency = prometheus:histogram("http_latency",
"HTTP request latency in milliseconds per service in APISIX",
{"type", "route", "service", "consumer", "node",
unpack(extra_labels("http_latency"))},
- buckets, exptime)
+ buckets, latency_metrics_exptime)
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes consumed per service in APISIX",
{"type", "route", "service", "consumer", "node",
unpack(extra_labels("bandwidth"))},
- exptime)
+ bandwidth_metrics_exptime)
if prometheus_enabled_in_stream then
init_stream_metrics()
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index 99af131d1..8052beef6 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -596,25 +596,29 @@ plugin_attr: # Plugin attributes
port: 9091 # Set the port.
# metrics: # Create extra labels from nginx variables:
https://nginx.org/en/docs/varindex.html
# http_status:
+ # expire: 0 # The expiration time after which metrics are removed.
unit: second.
+ # # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
# - status: $upstream_status # The label name does not need to be
the same as the variable name.
# http_latency:
+ # expire: 0 # The expiration time after which metrics are removed.
unit: second.
+ # # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
# bandwidth:
+ # expire: 0 # The expiration time after which metrics are removed.
unit: second.
+ # # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
+ # upstream_status:
+ # expire: 0 # The expiration time after which metrics are removed.
unit: second.
# default_buckets:
# - 10
# - 50
# - 100
# - 200
# - 500
- # expire: 0 # The expiration time of metrics in
seconds.
- # 0 means the metrics will not expire.
- # Only affect apisix_http_status,
apisix_bandwidth, and apisix_http_latency.
- # If you need to set the expiration
time, it is recommended to use 600, which is 10 minutes.
server-info: # Plugin: server-info
report_ttl: 60 # Set the TTL in seconds for server info
in etcd.
# Maximum: 86400. Minimum: 3.
diff --git a/t/plugin/prometheus-metric-expire.t
b/t/plugin/prometheus-metric-expire.t
new file mode 100644
index 000000000..caad85ea0
--- /dev/null
+++ b/t/plugin/prometheus-metric-expire.t
@@ -0,0 +1,132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BEGIN {
+ if ($ENV{TEST_NGINX_CHECK_LEAK}) {
+ $SkipReason = "unavailable for the hup tests";
+
+ } else {
+ $ENV{TEST_NGINX_USE_HUP} = 1;
+ undef $ENV{TEST_NGINX_USE_STAP};
+ }
+}
+
+use t::APISIX 'no_plan';
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+});
+
+run_tests;
+
+__DATA__
+
+=== TEST 1: set route with prometheus ttl
+--- yaml_config
+plugin_attr:
+ prometheus:
+ default_buckets:
+ - 15
+ - 55
+ - 105
+ - 205
+ - 505
+ metrics:
+ http_status:
+ expire: 1
+ http_latency:
+ expire: 1
+ bandwidth:
+ expire: 1
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/metrics',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "public-api": {}
+ },
+ "uri": "/apisix/prometheus/metrics"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ return
+ end
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "prometheus": {}
+ },
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello1"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say(body)
+ return
+ end
+ local code, body = t('/hello1',
+ ngx.HTTP_GET,
+ "",
+ nil,
+ nil
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say(body)
+ return
+ end
+ ngx.sleep(2)
+ local code, pass, body = t('/apisix/prometheus/metrics',
+ ngx.HTTP_GET,
+ "",
+ nil,
+ nil
+ )
+
+ local metrics_to_check = {"apisix_bandwidth", "http_latency",
"http_status",}
+
+ -- verify that above mentioned metrics are not in the metrics
response
+ for _, v in pairs(metrics_to_check) do
+ local match, err = ngx.re.match(body, "\\b" .. v .. "\\b", "m")
+ if match then
+ ngx.status = 500
+ ngx.say("error found " .. v .. " in metrics")
+ return
+ end
+ end
+
+ ngx.say("passed")
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
diff --git a/t/plugin/prometheus4.t b/t/plugin/prometheus4.t
index 758f2aae9..2cc1508c8 100644
--- a/t/plugin/prometheus4.t
+++ b/t/plugin/prometheus4.t
@@ -192,95 +192,7 @@
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",nod
-=== TEST 9: set route with prometheus ttl
---- yaml_config
-plugin_attr:
- prometheus:
- default_buckets:
- - 15
- - 55
- - 105
- - 205
- - 505
- expire: 1
---- config
- location /t {
- content_by_lua_block {
- local t = require("lib.test_admin").test
-
- local code = t('/apisix/admin/routes/metrics',
- ngx.HTTP_PUT,
- [[{
- "plugins": {
- "public-api": {}
- },
- "uri": "/apisix/prometheus/metrics"
- }]]
- )
- if code >= 300 then
- ngx.status = code
- return
- end
-
- local code, body = t('/apisix/admin/routes/1',
- ngx.HTTP_PUT,
- [[{
- "plugins": {
- "prometheus": {}
- },
- "upstream": {
- "nodes": {
- "127.0.0.1:1980": 1
- },
- "type": "roundrobin"
- },
- "uri": "/hello1"
- }]]
- )
-
- if code >= 300 then
- ngx.status = code
- ngx.say(body)
- return
- end
-
- local code, body = t('/hello1',
- ngx.HTTP_GET,
- "",
- nil,
- nil
- )
-
- if code >= 300 then
- ngx.status = code
- ngx.say(body)
- return
- end
-
- ngx.sleep(2)
-
- local code, pass, body = t('/apisix/prometheus/metrics',
- ngx.HTTP_GET,
- "",
- nil,
- nil
- )
- ngx.status = code
- ngx.say(body)
- }
- }
---- request
-GET /t
---- response_body_unlike eval
-qr/apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="15"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="55"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="105"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="205"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="505"\}
\d+/
-
-
-
-=== TEST 10: set sys plugins
+=== TEST 9: set sys plugins
--- config
location /t {
content_by_lua_block {
@@ -332,7 +244,7 @@ passed
-=== TEST 11: remove prometheus -> reload -> send batch request -> add
prometheus for next tests
+=== TEST 10: remove prometheus -> reload -> send batch request -> add
prometheus for next tests
--- yaml_config
deployment:
role: traditional
@@ -366,7 +278,7 @@ qr/404 Not Found/
-=== TEST 12: fetch prometheus metrics -> batch_process_entries metrics should
not be present
+=== TEST 11: fetch prometheus metrics -> batch_process_entries metrics should
not be present
--- yaml_config
deployment:
role: traditional
@@ -387,14 +299,14 @@
qr/apisix_batch_process_entries\{name="sys-logger",route_id="9",server_addr="127
-=== TEST 13: hit batch-process-metrics with prometheus enabled from TEST 11
+=== TEST 12: hit batch-process-metrics with prometheus enabled from TEST 11
--- request
GET /batch-process-metrics
--- error_code: 404
-=== TEST 14: batch_process_entries metrics should be present now
+=== TEST 13: batch_process_entries metrics should be present now
--- request
GET /apisix/prometheus/metrics
--- error_code: 200