Copilot commented on code in PR #13202:
URL: https://github.com/apache/apisix/pull/13202#discussion_r3382884090
##########
apisix/plugins/prometheus.lua:
##########
@@ -29,18 +29,99 @@ local schema = {
}
+-- Per-metric list of built-in labels whose values can be collapsed to an empty
+-- string to reduce cardinality. The `enum` of each metric intentionally omits
+-- structural labels that define the metric's identity (`code` on
`http_status`,
+-- `type` on `http_latency`/`bandwidth`), so they cannot be disabled. The enum
+-- must stay in sync with `metric_label_map` in exporter.lua.
+local metadata_schema = {
Review Comment:
The disabled-label allowlist is duplicated across `metadata_schema` (here)
and `metric_label_map` (exporter.lua), and the comment notes they must stay in
sync. To avoid drift, consider centralizing these label lists in a shared
module/constant, or generating the schema enums from a single source-of-truth
table and/or adding a test that asserts the schema enums match the exporter’s
built-in label lists (minus structural labels).
##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -107,6 +106,103 @@ local function extra_labels(name, ctx)
end
+local lrucache = core.lrucache.new({
+ type = "plugin",
+})
+
+
+-- Single source of truth for the ordered built-in label list of every metric
+-- that supports label disabling. It is used both to register the metric (see
+-- `append_tables` in `http_init`) and to map positional label values back to
+-- their names when collapsing disabled labels (see
+-- `get_enabled_label_values_for_metric`). Keeping a single declaration avoids
+-- the two lists drifting out of sync.
+local metric_label_map = {
+ http_status = {"code", "route", "matched_uri", "matched_host", "service",
"consumer", "node",
+ "request_type", "request_llm_model", "llm_model", "response_source"},
+ http_latency = {"type", "route", "service", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ bandwidth = {"type", "route", "service", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_latency = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_prompt_tokens = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_completion_tokens = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_active_connections = {"route", "route_id", "matched_uri",
"matched_host",
+ "service", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+}
+
+
+-- Concatenate the given label-name tables into a new ordered table. Used at
+-- metric registration time to combine the built-in labels from
+-- `metric_label_map` with the operator-defined `extra_labels`.
+local function append_tables(...)
+ local res = {}
+ for _, tab in ipairs({...}) do
+ for _, v in ipairs(tab) do
+ core.table.insert(res, v)
+ end
+ end
+ return res
+end
+
+
+-- Shared empty table returned when no `disabled_labels` metadata is
configured,
+-- to avoid allocating in the log hot path. It is only ever read, never
mutated.
+local empty_disabled_map = {}
+
+
+local function build_disabled_label_metric_map(disabled_labels)
+ local disabled_label_metric_map = {}
+ for metric_name, labels in pairs(disabled_labels) do
+ disabled_label_metric_map[metric_name] = {}
+ for _, label in ipairs(labels) do
+ disabled_label_metric_map[metric_name][label] = true
+ end
+ end
+ return disabled_label_metric_map
+end
+
+
+-- Read the per-metric `disabled_labels` from the plugin metadata and return a
+-- lookup of metric_name -> {label_name = true}. The built map is cached via
+-- lrucache keyed by the metadata's modifiedIndex, so it is only rebuilt when
the
+-- metadata changes rather than on every request in the log hot path.
+local function get_disabled_label_metric_map()
+ local metadata = plugin.plugin_metadata(plugin_name)
+ if not (metadata and metadata.value and metadata.value.disabled_labels
+ and metadata.modifiedIndex) then
+ return empty_disabled_map
+ end
+
+ return lrucache(plugin_name, metadata.modifiedIndex,
+ build_disabled_label_metric_map,
metadata.value.disabled_labels)
+end
+
+
+local function get_enabled_label_values_for_metric(metric_name,
disabled_label_metric_map, ...)
+ local label_values = gen_arr(...)
+ local metric_labels = metric_label_map[metric_name]
+
+ if not metric_labels then
+ return label_values
+ end
+
+ local disabled_labels = disabled_label_metric_map[metric_name] or {}
+ for i in ipairs(label_values) do
+ local label_name = metric_labels[i]
+ if label_name and disabled_labels[label_name] then
+ label_values[i] = ""
+ end
+ end
Review Comment:
Iterating with `ipairs(label_values)` can stop early if any positional label
value is `nil`, which would skip collapsing for subsequent labels and can leave
holes in the positional label array. A more robust approach is to iterate over
the known ordered `metric_labels` list (its indices are the authoritative
positions) and apply collapsing by position, optionally normalizing missing
values to `\"\"` for built-in label positions to preserve correct label/value
alignment.
##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -213,51 +306,42 @@ function _M.http_init(prometheus_enabled_in_stream)
end
metrics.latency = prometheus:histogram("http_latency",
- "HTTP request latency in milliseconds per service in APISIX",
- {"type", "route", "service", "consumer", "node",
- "request_type", "request_llm_model", "llm_model",
- unpack(extra_labels("http_latency"))},
- buckets, latency_metrics_exptime)
+ "HTTP request latency in milliseconds per service in APISIX",
+ append_tables(metric_label_map.http_latency,
extra_labels("http_latency")),
+ buckets, latency_metrics_exptime)
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes consumed per service in APISIX",
- {"type", "route", "service", "consumer", "node",
- "request_type", "request_llm_model", "llm_model",
- unpack(extra_labels("bandwidth"))},
+ append_tables(metric_label_map.bandwidth,
extra_labels("bandwidth")),
bandwidth_metrics_exptime)
Review Comment:
The new `disabled_labels` behavior applies to multiple metrics (e.g.,
`bandwidth` and the LLM metrics), but the added test only verifies
`http_status` collapsing and `http_latency` non-collapsing. Consider adding at
least one assertion for another metric (e.g., `bandwidth`) to cover that the
per-metric scoping works beyond the two HTTP metrics, and/or a minimal LLM
metric case if those labels are expected to be present in this test environment.
##########
apisix/plugins/prometheus.lua:
##########
@@ -29,18 +29,99 @@ local schema = {
}
+-- Per-metric list of built-in labels whose values can be collapsed to an empty
+-- string to reduce cardinality. The `enum` of each metric intentionally omits
+-- structural labels that define the metric's identity (`code` on
`http_status`,
+-- `type` on `http_latency`/`bandwidth`), so they cannot be disabled. The enum
+-- must stay in sync with `metric_label_map` in exporter.lua.
+local metadata_schema = {
+ type = "object",
+ properties = {
+ disabled_labels = {
+ type = "object",
+ properties = {
+ http_status = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route", "matched_uri", "matched_host",
"service",
+ "consumer", "node", "request_type",
+ "request_llm_model", "llm_model",
"response_source"},
+ },
+ },
+ http_latency = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route", "service", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ bandwidth = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route", "service", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ llm_latency = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ llm_prompt_tokens = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ llm_completion_tokens = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ llm_active_connections = {
+ type = "array",
+ items = {
+ type = "string",
+ enum = {"route", "route_id", "matched_uri",
"matched_host",
+ "service", "service_id", "consumer", "node",
+ "request_type", "request_llm_model",
"llm_model"},
+ },
+ },
+ },
+ additionalProperties = false,
+ },
+ },
+}
Review Comment:
At the metadata root level, `additionalProperties` is not set, so unknown
top-level keys in plugin metadata will be accepted silently. If the intent is
to keep metadata strict (similar to `disabled_labels.additionalProperties =
false`), consider adding `additionalProperties = false` at the root of
`metadata_schema` as well to avoid typos/misconfigurations being ignored.
##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -107,6 +106,103 @@ local function extra_labels(name, ctx)
end
+local lrucache = core.lrucache.new({
+ type = "plugin",
+})
+
+
+-- Single source of truth for the ordered built-in label list of every metric
+-- that supports label disabling. It is used both to register the metric (see
+-- `append_tables` in `http_init`) and to map positional label values back to
+-- their names when collapsing disabled labels (see
+-- `get_enabled_label_values_for_metric`). Keeping a single declaration avoids
+-- the two lists drifting out of sync.
+local metric_label_map = {
+ http_status = {"code", "route", "matched_uri", "matched_host", "service",
"consumer", "node",
+ "request_type", "request_llm_model", "llm_model", "response_source"},
+ http_latency = {"type", "route", "service", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ bandwidth = {"type", "route", "service", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_latency = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_prompt_tokens = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_completion_tokens = {"route_id", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+ llm_active_connections = {"route", "route_id", "matched_uri",
"matched_host",
+ "service", "service_id", "consumer", "node",
+ "request_type", "request_llm_model", "llm_model"},
+}
+
+
+-- Concatenate the given label-name tables into a new ordered table. Used at
+-- metric registration time to combine the built-in labels from
+-- `metric_label_map` with the operator-defined `extra_labels`.
+local function append_tables(...)
+ local res = {}
+ for _, tab in ipairs({...}) do
+ for _, v in ipairs(tab) do
+ core.table.insert(res, v)
+ end
+ end
+ return res
+end
+
+
+-- Shared empty table returned when no `disabled_labels` metadata is
configured,
+-- to avoid allocating in the log hot path. It is only ever read, never
mutated.
+local empty_disabled_map = {}
+
+
+local function build_disabled_label_metric_map(disabled_labels)
+ local disabled_label_metric_map = {}
+ for metric_name, labels in pairs(disabled_labels) do
+ disabled_label_metric_map[metric_name] = {}
+ for _, label in ipairs(labels) do
+ disabled_label_metric_map[metric_name][label] = true
+ end
+ end
+ return disabled_label_metric_map
+end
+
+
+-- Read the per-metric `disabled_labels` from the plugin metadata and return a
+-- lookup of metric_name -> {label_name = true}. The built map is cached via
+-- lrucache keyed by the metadata's modifiedIndex, so it is only rebuilt when
the
+-- metadata changes rather than on every request in the log hot path.
+local function get_disabled_label_metric_map()
+ local metadata = plugin.plugin_metadata(plugin_name)
+ if not (metadata and metadata.value and metadata.value.disabled_labels
+ and metadata.modifiedIndex) then
+ return empty_disabled_map
+ end
+
+ return lrucache(plugin_name, metadata.modifiedIndex,
+ build_disabled_label_metric_map,
metadata.value.disabled_labels)
+end
+
+
+local function get_enabled_label_values_for_metric(metric_name,
disabled_label_metric_map, ...)
+ local label_values = gen_arr(...)
+ local metric_labels = metric_label_map[metric_name]
+
+ if not metric_labels then
+ return label_values
+ end
+
+ local disabled_labels = disabled_label_metric_map[metric_name] or {}
+ for i in ipairs(label_values) do
+ local label_name = metric_labels[i]
+ if label_name and disabled_labels[label_name] then
+ label_values[i] = ""
+ end
+ end
+
+ return label_values
+end
Review Comment:
`get_enabled_label_values_for_metric` always iterates over `label_values`
even when no labels are disabled, which adds overhead in the log hot path.
Consider an early return when `disabled_label_metric_map` is the shared empty
map or when `disabled_label_metric_map[metric_name]` is nil/empty (e.g.,
`next(disabled_labels) == nil`) so the common case stays as close to the old
`gen_arr(...)` path as possible.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]