janiussyafiq commented on code in PR #13202:
URL: https://github.com/apache/apisix/pull/13202#discussion_r3382953435


##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -107,6 +106,103 @@ local function extra_labels(name, ctx)
 end
 
 
+local lrucache = core.lrucache.new({
+    type = "plugin",
+})
+
+
+-- Single source of truth for the ordered built-in label list of every metric
+-- that supports label disabling. It is used both to register the metric (see
+-- `append_tables` in `http_init`) and to map positional label values back to
+-- their names when collapsing disabled labels (see
+-- `get_enabled_label_values_for_metric`). Keeping a single declaration avoids
+-- the two lists drifting out of sync.
+local metric_label_map = {
+    http_status = {"code", "route", "matched_uri", "matched_host", "service", 
"consumer", "node",
+        "request_type", "request_llm_model", "llm_model", "response_source"},
+    http_latency = {"type", "route", "service", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    bandwidth = {"type", "route", "service", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_latency = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_prompt_tokens = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_completion_tokens = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_active_connections = {"route", "route_id", "matched_uri", 
"matched_host",
+        "service", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+}
+
+
+-- Concatenate the given label-name tables into a new ordered table. Used at
+-- metric registration time to combine the built-in labels from
+-- `metric_label_map` with the operator-defined `extra_labels`.
+local function append_tables(...)
+    local res = {}
+    for _, tab in ipairs({...}) do
+        for _, v in ipairs(tab) do
+            core.table.insert(res, v)
+        end
+    end
+    return res
+end
+
+
+-- Shared empty table returned when no `disabled_labels` metadata is 
configured,
+-- to avoid allocating in the log hot path. It is only ever read, never 
mutated.
+local empty_disabled_map = {}
+
+
+local function build_disabled_label_metric_map(disabled_labels)
+    local disabled_label_metric_map = {}
+    for metric_name, labels in pairs(disabled_labels) do
+        disabled_label_metric_map[metric_name] = {}
+        for _, label in ipairs(labels) do
+            disabled_label_metric_map[metric_name][label] = true
+        end
+    end
+    return disabled_label_metric_map
+end
+
+
+-- Read the per-metric `disabled_labels` from the plugin metadata and return a
+-- lookup of metric_name -> {label_name = true}. The built map is cached via
+-- lrucache keyed by the metadata's modifiedIndex, so it is only rebuilt when 
the
+-- metadata changes rather than on every request in the log hot path.
+local function get_disabled_label_metric_map()
+    local metadata = plugin.plugin_metadata(plugin_name)
+    if not (metadata and metadata.value and metadata.value.disabled_labels
+            and metadata.modifiedIndex) then
+        return empty_disabled_map
+    end
+
+    return lrucache(plugin_name, metadata.modifiedIndex,
+                    build_disabled_label_metric_map, 
metadata.value.disabled_labels)
+end
+
+
+local function get_enabled_label_values_for_metric(metric_name, 
disabled_label_metric_map, ...)
+    local label_values = gen_arr(...)
+    local metric_labels = metric_label_map[metric_name]
+
+    if not metric_labels then
+        return label_values
+    end
+
+    local disabled_labels = disabled_label_metric_map[metric_name] or {}
+    for i in ipairs(label_values) do
+        local label_name = metric_labels[i]
+        if label_name and disabled_labels[label_name] then
+            label_values[i] = ""
+        end
+    end
+
+    return label_values
+end

Review Comment:
   Fixed in 2a59ae4d. `get_enabled_label_values_for_metric` now returns early 
when `disabled_label_metric_map[metric_name]` is nil — i.e. whenever nothing is 
disabled for the metric, which is always the case when no metadata is 
configured. This keeps the no-config path as close as possible to the original 
`gen_arr(...)` and also removes a per-call empty-table allocation (the previous 
`... or {}`).



##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -107,6 +106,103 @@ local function extra_labels(name, ctx)
 end
 
 
+local lrucache = core.lrucache.new({
+    type = "plugin",
+})
+
+
+-- Single source of truth for the ordered built-in label list of every metric
+-- that supports label disabling. It is used both to register the metric (see
+-- `append_tables` in `http_init`) and to map positional label values back to
+-- their names when collapsing disabled labels (see
+-- `get_enabled_label_values_for_metric`). Keeping a single declaration avoids
+-- the two lists drifting out of sync.
+local metric_label_map = {
+    http_status = {"code", "route", "matched_uri", "matched_host", "service", 
"consumer", "node",
+        "request_type", "request_llm_model", "llm_model", "response_source"},
+    http_latency = {"type", "route", "service", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    bandwidth = {"type", "route", "service", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_latency = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_prompt_tokens = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_completion_tokens = {"route_id", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+    llm_active_connections = {"route", "route_id", "matched_uri", 
"matched_host",
+        "service", "service_id", "consumer", "node",
+        "request_type", "request_llm_model", "llm_model"},
+}
+
+
+-- Concatenate the given label-name tables into a new ordered table. Used at
+-- metric registration time to combine the built-in labels from
+-- `metric_label_map` with the operator-defined `extra_labels`.
+local function append_tables(...)
+    local res = {}
+    for _, tab in ipairs({...}) do
+        for _, v in ipairs(tab) do
+            core.table.insert(res, v)
+        end
+    end
+    return res
+end
+
+
+-- Shared empty table returned when no `disabled_labels` metadata is 
configured,
+-- to avoid allocating in the log hot path. It is only ever read, never 
mutated.
+local empty_disabled_map = {}
+
+
+local function build_disabled_label_metric_map(disabled_labels)
+    local disabled_label_metric_map = {}
+    for metric_name, labels in pairs(disabled_labels) do
+        disabled_label_metric_map[metric_name] = {}
+        for _, label in ipairs(labels) do
+            disabled_label_metric_map[metric_name][label] = true
+        end
+    end
+    return disabled_label_metric_map
+end
+
+
+-- Read the per-metric `disabled_labels` from the plugin metadata and return a
+-- lookup of metric_name -> {label_name = true}. The built map is cached via
+-- lrucache keyed by the metadata's modifiedIndex, so it is only rebuilt when 
the
+-- metadata changes rather than on every request in the log hot path.
+local function get_disabled_label_metric_map()
+    local metadata = plugin.plugin_metadata(plugin_name)
+    if not (metadata and metadata.value and metadata.value.disabled_labels
+            and metadata.modifiedIndex) then
+        return empty_disabled_map
+    end
+
+    return lrucache(plugin_name, metadata.modifiedIndex,
+                    build_disabled_label_metric_map, 
metadata.value.disabled_labels)
+end
+
+
+local function get_enabled_label_values_for_metric(metric_name, 
disabled_label_metric_map, ...)
+    local label_values = gen_arr(...)
+    local metric_labels = metric_label_map[metric_name]
+
+    if not metric_labels then
+        return label_values
+    end
+
+    local disabled_labels = disabled_label_metric_map[metric_name] or {}
+    for i in ipairs(label_values) do
+        local label_name = metric_labels[i]
+        if label_name and disabled_labels[label_name] then
+            label_values[i] = ""
+        end
+    end

Review Comment:
   Fixed in 2a59ae4d. The collapse loop now iterates the authoritative ordered 
list via `for i = 1, #metric_labels` instead of `ipairs(label_values)`, so a 
`nil` positional value can no longer terminate the scan early. `#metric_labels` 
covers only the built-in label positions, so operator-defined `extra_labels` 
appended after them are intentionally left untouched.



##########
apisix/plugins/prometheus.lua:
##########
@@ -29,18 +29,99 @@ local schema = {
 }
 
 
+-- Per-metric list of built-in labels whose values can be collapsed to an empty
+-- string to reduce cardinality. The `enum` of each metric intentionally omits
+-- structural labels that define the metric's identity (`code` on 
`http_status`,
+-- `type` on `http_latency`/`bandwidth`), so they cannot be disabled. The enum
+-- must stay in sync with `metric_label_map` in exporter.lua.
+local metadata_schema = {

Review Comment:
   Intentional, leaving as-is. The `enum` here is deliberately a *subset* of 
`metric_label_map`: it omits the structural labels (`code`, `type`) that must 
not be disable-able. So the two are not the same list and cannot be shared 
verbatim — deriving the enums from the exporter table would require encoding 
the structural-label exclusion separately and building the schema dynamically, 
which is less readable than the explicit literal and diverges from how other 
plugins declare their metadata schemas.
   
   The drift consequence is also mild: a label added to the registration list 
but not to the enum simply isn't disable-able yet (rejected at config time), 
not a runtime error. I've kept the explicit schema with the in-code sync note; 
this mirrors how the same feature is implemented in api7's gateway. Happy to 
add a consistency assertion if maintainers prefer it.



##########
apisix/plugins/prometheus.lua:
##########
@@ -29,18 +29,99 @@ local schema = {
 }
 
 
+-- Per-metric list of built-in labels whose values can be collapsed to an empty
+-- string to reduce cardinality. The `enum` of each metric intentionally omits
+-- structural labels that define the metric's identity (`code` on 
`http_status`,
+-- `type` on `http_latency`/`bandwidth`), so they cannot be disabled. The enum
+-- must stay in sync with `metric_label_map` in exporter.lua.
+local metadata_schema = {
+    type = "object",
+    properties = {
+        disabled_labels = {
+            type = "object",
+            properties = {
+                http_status = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route", "matched_uri", "matched_host", 
"service",
+                                "consumer", "node", "request_type",
+                                "request_llm_model", "llm_model", 
"response_source"},
+                    },
+                },
+                http_latency = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route", "service", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+                bandwidth = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route", "service", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+                llm_latency = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route_id", "service_id", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+                llm_prompt_tokens = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route_id", "service_id", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+                llm_completion_tokens = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route_id", "service_id", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+                llm_active_connections = {
+                    type = "array",
+                    items = {
+                        type = "string",
+                        enum = {"route", "route_id", "matched_uri", 
"matched_host",
+                                "service", "service_id", "consumer", "node",
+                                "request_type", "request_llm_model", 
"llm_model"},
+                    },
+                },
+            },
+            additionalProperties = false,
+        },
+    },
+}

Review Comment:
   Left open intentionally for consistency with the codebase. None of the 
existing metadata schemas (`http-logger`, `syslog`, `kafka-logger`, 
`error-log-logger`, …) set `additionalProperties = false` at the root, so 
adding it only here would diverge from the established pattern. The inner 
`disabled_labels.additionalProperties = false` already rejects the realistic 
misconfiguration — an unknown metric key. Happy to tighten the root if 
maintainers would rather standardize on strict metadata.



##########
apisix/plugins/prometheus/exporter.lua:
##########
@@ -213,51 +306,42 @@ function _M.http_init(prometheus_enabled_in_stream)
     end
 
     metrics.latency = prometheus:histogram("http_latency",
-        "HTTP request latency in milliseconds per service in APISIX",
-        {"type", "route", "service", "consumer", "node",
-        "request_type", "request_llm_model", "llm_model",
-        unpack(extra_labels("http_latency"))},
-        buckets, latency_metrics_exptime)
+            "HTTP request latency in milliseconds per service in APISIX",
+            append_tables(metric_label_map.http_latency, 
extra_labels("http_latency")),
+            buckets, latency_metrics_exptime)
 
     metrics.bandwidth = prometheus:counter("bandwidth",
             "Total bandwidth in bytes consumed per service in APISIX",
-            {"type", "route", "service", "consumer", "node",
-            "request_type", "request_llm_model", "llm_model",
-            unpack(extra_labels("bandwidth"))},
+            append_tables(metric_label_map.bandwidth, 
extra_labels("bandwidth")),
             bandwidth_metrics_exptime)

Review Comment:
   Added in 2a59ae4d. The test now also disables `node` on `bandwidth` and 
asserts `apisix_bandwidth` exposes `node=""` while keeping `route="1"` — 
covering per-metric scoping on a third metric, and a second metric *type* 
(counter), beyond `http_status`/`http_latency`. The LLM metrics are 
intentionally left out: they go through the identical 
`get_enabled_label_values_for_metric` path, so exercising them would require 
standing up an ai-proxy fixture for no additional code coverage.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to