This is an automated email from the ASF dual-hosted git repository.
nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new afda19403 feat: add more spans to opentelemetry plugin (#12686)
afda19403 is described below
commit afda19403df926556d19cee0f4f8f5f9dee95426
Author: Nic <[email protected]>
AuthorDate: Sat Feb 7 18:45:57 2026 +0800
feat: add more spans to opentelemetry plugin (#12686)
Signed-off-by: Nic <[email protected]>
Co-authored-by: Ashish Tiwari <[email protected]>
Co-authored-by: AlinsRan <[email protected]>
---
apisix/cli/config.lua | 3 +-
apisix/core/response.lua | 4 +
apisix/init.lua | 29 +++-
apisix/plugin.lua | 6 +
apisix/plugins/opentelemetry.lua | 90 +++++++++---
apisix/secret.lua | 6 +
apisix/ssl/router/radixtree_sni.lua | 6 +-
apisix/tracer.lua | 87 ++++++++++++
apisix/utils/noop_span.lua | 47 +++++++
apisix/utils/span.lua | 125 +++++++++++++++++
apisix/utils/upstream.lua | 6 +-
conf/config.yaml.example | 4 +
docs/en/latest/plugins/opentelemetry.md | 119 ++++++++++++++--
docs/zh/latest/plugins/opentelemetry.md | 141 ++++++++++++++++---
t/plugin/opentelemetry6.t | 236 ++++++++++++++++++++++++++++++++
15 files changed, 857 insertions(+), 52 deletions(-)
diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index 25e6783c2..13359fa81 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -84,7 +84,8 @@ local _M = {
neg_ttl = 60,
neg_count = 512
}
- }
+ },
+ tracing = false
},
nginx_config = {
error_log = "logs/error.log",
diff --git a/apisix/core/response.lua b/apisix/core/response.lua
index baee97749..ffc692eb8 100644
--- a/apisix/core/response.lua
+++ b/apisix/core/response.lua
@@ -19,6 +19,7 @@
--
-- @module core.response
+local tracer = require("apisix.tracer")
local encode_json = require("cjson.safe").encode
local ngx = ngx
local arg = ngx.arg
@@ -86,6 +87,9 @@ function resp_exit(code, ...)
end
if code then
+ if code >= 400 then
+ tracer.finish_all(ngx.ctx, tracer.status.ERROR, "response code "
.. code)
+ end
return ngx_exit(code)
end
end
diff --git a/apisix/init.lua b/apisix/init.lua
index 1fb090025..5fc874254 100644
--- a/apisix/init.lua
+++ b/apisix/init.lua
@@ -47,6 +47,8 @@ local debug = require("apisix.debug")
local pubsub_kafka = require("apisix.pubsub.kafka")
local resource = require("apisix.resource")
local trusted_addresses_util = require("apisix.utils.trusted-addresses")
+local tracer = require("apisix.tracer")
+
local discovery = require("apisix.discovery.init").discovery
local ngx = ngx
local get_method = ngx.req.get_method
@@ -202,6 +204,9 @@ function _M.ssl_client_hello_phase()
local ngx_ctx = ngx.ctx
local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
ngx_ctx.api_ctx = api_ctx
+ api_ctx.ngx_ctx = ngx_ctx
+
+ local span = tracer.start(ngx_ctx, "ssl_client_hello_phase",
tracer.kind.server)
local ok, err = router.router_ssl.match_and_set(api_ctx, true, sni)
@@ -215,18 +220,23 @@ function _M.ssl_client_hello_phase()
core.log.error("failed to fetch ssl config: ", err)
end
core.log.error("failed to match any SSL certificate by SNI: ", sni)
+ span:set_status(tracer.status.ERROR, "no matched SSL")
+ span:finish(ngx_ctx)
ngx_exit(-1)
end
ok, err =
apisix_ssl.set_protocols_by_clienthello(ngx_ctx.matched_ssl.value.ssl_protocols)
if not ok then
core.log.error("failed to set ssl protocols: ", err)
+ span:set_status(tracer.status.ERROR, "failed set protocols")
+ span:finish(ngx_ctx)
ngx_exit(-1)
end
-- in stream subsystem, ngx.ssl.server_name() return hostname of ssl
session in preread phase,
-- so that we can't get real SNI without recording it in ngx.ctx during
client_hello phase
ngx.ctx.client_hello_sni = sni
+ span:finish(ngx_ctx)
end
@@ -480,7 +490,6 @@ local function common_phase(phase_name)
end
-
function _M.handle_upstream(api_ctx, route, enable_websocket)
-- some plugins(ai-proxy...) request upstream by http client directly
if api_ctx.bypass_nginx_upstream then
@@ -677,9 +686,12 @@ function _M.http_access_phase()
-- always fetch table from the table pool, we don't need a reused api_ctx
local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
ngx_ctx.api_ctx = api_ctx
+ api_ctx.ngx_ctx = ngx_ctx
core.ctx.set_vars_meta(api_ctx)
+ local span = tracer.start(ngx_ctx, "apisix.phase.access",
tracer.kind.server)
+
if not verify_https_client(api_ctx) then
return core.response.exit(400)
end
@@ -717,10 +729,13 @@ function _M.http_access_phase()
handle_x_forwarded_headers(api_ctx)
+ local match_span = tracer.start(ngx_ctx, "http_router_match",
tracer.kind.internal)
router.router_http.match(api_ctx)
local route = api_ctx.matched_route
if not route then
+ match_span:set_status(tracer.status.ERROR, "no matched route")
+ match_span:finish(ngx.ctx)
-- run global rule when there is no matching route
local global_rules, conf_version = apisix_global_rules.global_rules()
plugin.run_global_rules(api_ctx, global_rules, conf_version, nil)
@@ -729,6 +744,7 @@ function _M.http_access_phase()
return core.response.exit(404,
{error_msg = "404 Route Not Found"})
end
+ match_span:finish(ngx_ctx)
core.log.info("matched route: ",
core.json.delay_encode(api_ctx.matched_route, true))
@@ -785,7 +801,6 @@ function _M.http_access_phase()
else
local plugins = plugin.filter(api_ctx, route)
api_ctx.plugins = plugins
-
plugin.run_plugin("rewrite", plugins, api_ctx)
if api_ctx.consumer then
local changed
@@ -821,6 +836,7 @@ function _M.http_access_phase()
end
plugin.run_plugin("access", plugins, api_ctx)
end
+ span:finish(ngx_ctx)
_M.handle_upstream(api_ctx, route, enable_websocket)
@@ -879,6 +895,8 @@ end
function _M.http_header_filter_phase()
+ local ngx_ctx = ngx.ctx
+ local span = tracer.start(ngx_ctx, "apisix.phase.header_filter",
tracer.kind.server)
core.response.set_header("Server", ver_header)
local up_status = get_var("upstream_status")
@@ -901,6 +919,9 @@ function _M.http_header_filter_phase()
end
core.response.set_header("Apisix-Plugins",
core.table.concat(deduplicate, ", "))
end
+ span:finish(ngx_ctx)
+
+ tracer.start(ngx_ctx, "apisix.phase.body_filter", tracer.kind.server)
end
@@ -1056,6 +1077,7 @@ function _M.http_log_phase()
if not api_ctx then
return
end
+ tracer.finish_all(api_ctx.ngx_ctx)
if not api_ctx.var.apisix_upstream_response_time or
api_ctx.var.apisix_upstream_response_time == "" then
@@ -1081,6 +1103,9 @@ function _M.http_log_phase()
core.tablepool.release("matched_route_record",
api_ctx.curr_req_matched)
end
+ tracer.release(api_ctx.ngx_ctx)
+ api_ctx.ngx_ctx = nil
+
core.tablepool.release("api_ctx", api_ctx)
end
diff --git a/apisix/plugin.lua b/apisix/plugin.lua
index 20a08aa5f..1acc70455 100644
--- a/apisix/plugin.lua
+++ b/apisix/plugin.lua
@@ -38,6 +38,7 @@ local tostring = tostring
local error = error
local getmetatable = getmetatable
local setmetatable = setmetatable
+local tracer = require("apisix.tracer")
-- make linter happy to avoid error: getting the Lua global "load"
-- luacheck: globals load, ignore lua_load
local lua_load = load
@@ -1228,7 +1229,10 @@ function _M.run_plugin(phase, plugins, api_ctx)
plugin_run = true
run_meta_pre_function(conf, api_ctx, plugins[i]["name"])
api_ctx._plugin_name = plugins[i]["name"]
+ local span = tracer.start(api_ctx.ngx_ctx, "apisix.phase." .. phase
+ .. ".plugins." .. api_ctx._plugin_name)
phase_func(conf, api_ctx)
+ span:finish(api_ctx.ngx_ctx)
api_ctx._plugin_name = nil
end
end
@@ -1301,6 +1305,7 @@ end
function _M.run_global_rules(api_ctx, global_rules, conf_version, phase_name)
if global_rules and #global_rules > 0 then
+ local span = tracer.start(api_ctx.ngx_ctx, "run_global_rules",
tracer.kind.internal)
local orig_conf_type = api_ctx.conf_type
local orig_conf_version = api_ctx.conf_version
local orig_conf_id = api_ctx.conf_id
@@ -1335,6 +1340,7 @@ function _M.run_global_rules(api_ctx, global_rules,
conf_version, phase_name)
api_ctx.conf_type = orig_conf_type
api_ctx.conf_version = orig_conf_version
api_ctx.conf_id = orig_conf_id
+ span:finish(api_ctx.ngx_ctx)
end
end
diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua
index d98ac44ae..487c14381 100644
--- a/apisix/plugins/opentelemetry.lua
+++ b/apisix/plugins/opentelemetry.lua
@@ -48,6 +48,7 @@ local pairs = pairs
local ipairs = ipairs
local unpack = unpack
local string_format = string.format
+local update_time = ngx.update_time
local lrucache = core.lrucache.new({
type = 'plugin', count = 128, ttl = 24 * 60 * 60,
@@ -327,10 +328,17 @@ function _M.rewrite(conf, api_ctx)
local attributes = {
attr.string("net.host.name", vars.host),
+ -- deprecated attributes
attr.string("http.method", vars.method),
attr.string("http.scheme", vars.scheme),
attr.string("http.target", vars.request_uri),
attr.string("http.user_agent", vars.http_user_agent),
+
+ -- new attributes
+ attr.string("http.request.method", vars.method),
+ attr.string("url.scheme", vars.scheme),
+ attr.string("uri.path", vars.uri),
+ attr.string("user_agent.original", vars.http_user_agent),
}
if api_ctx.curr_req_matched then
@@ -376,6 +384,10 @@ function _M.rewrite(conf, api_ctx)
ngx_var.opentelemetry_span_id = span_context.span_id
end
+ if not ctx:span():is_recording() and ngx.ctx.tracing then
+ ngx.ctx.tracing.skip = true
+ end
+
api_ctx.otel_context_token = ctx:attach()
-- inject trace context into the headers of upstream HTTP request
@@ -383,41 +395,85 @@ function _M.rewrite(conf, api_ctx)
end
-function _M.delayed_body_filter(conf, api_ctx)
- if api_ctx.otel_context_token and ngx.arg[2] then
- local ctx = context:current()
- ctx:detach(api_ctx.otel_context_token)
- api_ctx.otel_context_token = nil
+local function create_child_span(tracer, parent_span_ctx, spans, span)
+ if not span or span.finished then
+ return
+ end
+ span.finished = true
+ local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name,
+ {
+ kind = span.kind,
+ attributes = span.attributes,
+ })
+ new_span.start_time = span.start_time
+
+ for _, idx in ipairs(span.child_ids or {}) do
+ create_child_span(tracer, new_span_ctx, spans, spans[idx])
+ end
+ if span.status then
+ new_span:set_status(span.status.code, span.status.message)
+ end
+ new_span:finish(span.end_time)
+end
- -- get span from current context
- local span = ctx:span()
- local upstream_status = core.response.get_upstream_status(api_ctx)
- if upstream_status and upstream_status >= 500 then
- span:set_status(span_status.ERROR,
- "upstream response status: " .. upstream_status)
- end
- span:set_attributes(attr.int("http.status_code", upstream_status))
+local function inject_core_spans(root_span_ctx, api_ctx, conf)
+ local tracing = api_ctx.ngx_ctx.tracing
+ if not tracing then
+ return
+ end
- span:finish()
+ local span = root_span_ctx:span()
+
+ local metadata = plugin.plugin_metadata(plugin_name)
+ local plugin_info = metadata.value
+ if span and not span:is_recording() then
+ return
+ end
+ local inject_conf = {
+ sampler = {
+ name = "always_on",
+ options = conf.sampler.options
+ },
+ additional_attributes = conf.additional_attributes,
+ additional_header_prefix_attributes =
conf.additional_header_prefix_attributes
+ }
+ local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil,
+ create_tracer_obj,
inject_conf, plugin_info)
+ if not tracer then
+ core.log.error("failed to fetch tracer object: ", err)
+ return
+ end
+
+ if #tracing.spans == 0 then
+ return
+ end
+ span.start_time = tracing.spans[1].start_time
+ local root_span = tracing.root_span
+ local spans = tracing.spans
+ for _, idx in ipairs(root_span.child_ids or {}) do
+ create_child_span(tracer, root_span_ctx, spans, spans[idx])
end
end
--- body_filter maybe not called because of empty http body response
--- so we need to check if the span has finished in log phase
function _M.log(conf, api_ctx)
if api_ctx.otel_context_token then
-- ctx:detach() is not necessary, because of ctx is stored in ngx.ctx
local upstream_status = core.response.get_upstream_status(api_ctx)
-- get span from current context
- local span = context:current():span()
+ local ctx = context:current()
+ local span = ctx:span()
if upstream_status and upstream_status >= 500 then
span:set_status(span_status.ERROR,
"upstream response status: " .. upstream_status)
end
+ inject_core_spans(ctx, api_ctx, conf)
+ span:set_attributes(attr.int("http.status_code", upstream_status),
+ attr.int("http.response.status_code",
upstream_status))
+ update_time()
span:finish()
end
end
diff --git a/apisix/secret.lua b/apisix/secret.lua
index 8ad1be260..4af7c7dd4 100644
--- a/apisix/secret.lua
+++ b/apisix/secret.lua
@@ -18,6 +18,7 @@
local require = require
local core = require("apisix.core")
local string = require("apisix.core.string")
+local tracer = require("apisix.tracer")
local local_conf = require("apisix.core.config_local").local_conf()
@@ -28,6 +29,7 @@ local byte = string.byte
local type = type
local pcall = pcall
local pairs = pairs
+local ngx = ngx
local _M = {}
@@ -148,6 +150,7 @@ local function fetch_by_uri_secret(secret_uri)
return nil, "no secret conf, secret_uri: " .. secret_uri
end
+ local span = tracer.start(ngx.ctx, "fetch_secret", tracer.kind.client)
local ok, sm = pcall(require, "apisix.secret." .. opts.manager)
if not ok then
return nil, "no secret manager: " .. opts.manager
@@ -155,9 +158,12 @@ local function fetch_by_uri_secret(secret_uri)
local value, err = sm.get(conf, opts.key)
if err then
+ span:set_status(tracer.status.ERROR, err)
+ span:finish(ngx.ctx)
return nil, err
end
+ span:finish(ngx.ctx)
return value
end
diff --git a/apisix/ssl/router/radixtree_sni.lua
b/apisix/ssl/router/radixtree_sni.lua
index 6104dcb10..15ea67225 100644
--- a/apisix/ssl/router/radixtree_sni.lua
+++ b/apisix/ssl/router/radixtree_sni.lua
@@ -21,6 +21,7 @@ local apisix_ssl = require("apisix.ssl")
local secret = require("apisix.secret")
local ngx_ssl = require("ngx.ssl")
local config_util = require("apisix.core.config_util")
+local tracer = require("apisix.tracer")
local ngx = ngx
local ipairs = ipairs
local type = type
@@ -169,6 +170,7 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
core.log.debug("sni: ", sni)
+ local span = tracer.start(api_ctx.ngx_ctx, "sni_radixtree_match",
tracer.kind.internal)
local sni_rev = sni:reverse()
local ok = radixtree_router:dispatch(sni_rev, nil, api_ctx)
if not ok then
@@ -177,9 +179,11 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
-- with it sometimes
core.log.error("failed to find any SSL certificate by SNI: ", sni)
end
+ span:set_status(tracer.status.ERROR, "failed match SNI")
+ span:finish(api_ctx.ngx_ctx)
return false
end
-
+ span:finish(api_ctx.ngx_ctx)
if api_ctx.matched_sni == "*" then
-- wildcard matches everything, no need for further validation
diff --git a/apisix/tracer.lua b/apisix/tracer.lua
new file mode 100644
index 000000000..8bee81712
--- /dev/null
+++ b/apisix/tracer.lua
@@ -0,0 +1,87 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local tablepool = require("tablepool")
+local span = require("apisix.utils.span")
+local noop_span = require("apisix.utils.noop_span").new()
+local span_kind = require("opentelemetry.trace.span_kind")
+local span_status = require("opentelemetry.trace.span_status")
+local local_conf = require("apisix.core.config_local").local_conf()
+local ipairs = ipairs
+local ngx = ngx
+
+local enable_tracing = false
+if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) ==
"boolean" then
+ enable_tracing = local_conf.apisix.tracing
+end
+
+local _M = {
+ kind = span_kind,
+ status = span_status,
+ span_state = {},
+}
+
+function _M.start(ctx, name, kind)
+ if not enable_tracing then
+ return noop_span
+ end
+
+ local tracing = ctx.tracing
+ if not tracing then
+ tracing = tablepool.fetch("tracing", 0, 8)
+ tracing.spans = tablepool.fetch("tracing_spans", 20, 0)
+ ctx.tracing = tracing
+ end
+ if tracing.skip then
+ return noop_span
+ end
+
+ local sp = span.new(ctx, name, kind)
+ return sp
+end
+
+
+function _M.finish_all(ctx, code, message)
+ local tracing = ctx.tracing
+ if not tracing then
+ return
+ end
+
+ tracing.current_span:set_status(code, message)
+ tracing.current_span:finish(ctx)
+
+ while tracing.current_span.parent_id do
+ tracing.current_span = tracing.spans[tracing.current_span.parent_id]
+ tracing.current_span:finish(ctx)
+ end
+end
+
+
+function _M.release(ctx)
+ local tracing = ctx.tracing
+ if not tracing then
+ return
+ end
+
+ for _, sp in ipairs(tracing.spans) do
+ sp:release()
+ end
+ tablepool.release("tracing_spans", tracing.spans)
+ tablepool.release("tracing", tracing)
+end
+
+
+return _M
diff --git a/apisix/utils/noop_span.lua b/apisix/utils/noop_span.lua
new file mode 100644
index 000000000..7504a973a
--- /dev/null
+++ b/apisix/utils/noop_span.lua
@@ -0,0 +1,47 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local setmetatable = setmetatable
+
+local _M = {}
+
+
+local mt = {
+ __index = _M
+}
+
+function _M.new(ctx, name, kind)
+ return setmetatable({}, mt)
+end
+
+
+function _M.set_status(self, code, message)
+end
+
+
+function _M.set_attributes(self, ...)
+end
+
+
+function _M.finish(self)
+end
+
+
+function _M.release(self)
+end
+
+
+return _M
diff --git a/apisix/utils/span.lua b/apisix/utils/span.lua
new file mode 100644
index 000000000..11ee50fd9
--- /dev/null
+++ b/apisix/utils/span.lua
@@ -0,0 +1,125 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local tablepool = require("tablepool")
+local util = require("opentelemetry.util")
+local span_status = require("opentelemetry.trace.span_status")
+local setmetatable = setmetatable
+local table = table
+local new_tab = require("table.new")
+local select = select
+local pool_name = "opentelemetry_span"
+local update_time = ngx.update_time
+
+local _M = {}
+
+
+local mt = {
+ __index = _M
+}
+
+local function get_time()
+ update_time()
+ return util.time_nano()
+end
+
+
+
+local function append_child(sp, child_id)
+ if not sp.child_ids then
+ sp.child_ids = new_tab(10, 0)
+ end
+ table.insert(sp.child_ids, child_id)
+end
+
+
+local function set_parent(sp, parent_id)
+ sp.parent_id = parent_id
+end
+
+
+function _M.new(ctx, name, kind)
+ local tracing = ctx.tracing
+
+ local self = tablepool.fetch(pool_name, 0, 16)
+ self.start_time = get_time()
+ self.name = name
+ self.kind = kind
+
+ table.insert(tracing.spans, self)
+ local id = #tracing.spans
+ self.id = id
+
+ local parent = tracing.current_span
+ if parent then
+ set_parent(self, parent.id)
+ append_child(parent, id)
+ else
+ tracing.root_span = self
+ end
+
+ ctx.tracing.current_span = self
+ return setmetatable(self, mt)
+end
+
+
+function _M.set_status(self, code, message)
+ code = span_status.validate(code)
+ local status = self.status
+ if not status then
+ status = {
+ code = code,
+ message = ""
+ }
+ self.status = status
+ else
+ status.code = code
+ end
+
+ if code == span_status.ERROR then
+ status.message = message
+ end
+end
+
+
+function _M.set_attributes(self, ...)
+ if not self.attributes then
+ self.attributes = table.new(10, 0)
+ end
+ local count = select('#', ...)
+ for i = 1, count do
+ local attr = select(i, ...)
+ table.insert(self.attributes, attr)
+ end
+end
+
+
+function _M.finish(self, ctx)
+ local tracing = ctx.tracing
+ self.end_time = get_time()
+ if not self.parent_id then
+ return
+ end
+ tracing.current_span = tracing.spans[self.parent_id]
+end
+
+
+function _M.release(self)
+ tablepool.release(pool_name, self)
+end
+
+
+return _M
diff --git a/apisix/utils/upstream.lua b/apisix/utils/upstream.lua
index 19bdd1a57..77f8c4d54 100644
--- a/apisix/utils/upstream.lua
+++ b/apisix/utils/upstream.lua
@@ -20,8 +20,8 @@ local ipairs = ipairs
local type = type
local tostring = tostring
local resource = require("apisix.resource")
-
-
+local tracer = require("apisix.tracer")
+local ngx = ngx
local _M = {}
@@ -81,6 +81,7 @@ _M.compare_upstream_node = compare_upstream_node
local function parse_domain_for_nodes(nodes)
+ local span = tracer.start(ngx.ctx, "resolve_dns", tracer.kind.internal)
local new_nodes = core.table.new(#nodes, 0)
for _, node in ipairs(nodes) do
local host = node.host
@@ -101,6 +102,7 @@ local function parse_domain_for_nodes(nodes)
core.table.insert(new_nodes, node)
end
end
+ span:finish(ngx.ctx)
return new_nodes
end
_M.parse_domain_for_nodes = parse_domain_for_nodes
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index 67fd190d8..4d78ae5af 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -149,6 +149,10 @@ apisix:
count: 512 # Cache size
neg_ttl: 60 # Negative cache TTL
neg_count: 512 # Negative cache size
+
+ tracing: false # Enable comprehensive request lifecycle
tracing (SSL/SNI, rewrite, access, header_filter, body_filter, and log).
+ # When disabled, OpenTelemetry collects
only a single span per request.
+
nginx_config: # Config for render the template to generate
nginx.conf
# user: root # Set the execution user of the worker
process. This is only
# effective if the master process runs with
super-user privileges.
diff --git a/docs/en/latest/plugins/opentelemetry.md
b/docs/en/latest/plugins/opentelemetry.md
index 061c26212..e0ab3eb71 100644
--- a/docs/en/latest/plugins/opentelemetry.md
+++ b/docs/en/latest/plugins/opentelemetry.md
@@ -95,6 +95,21 @@ curl
http://127.0.0.1:9180/apisix/admin/plugin_metadata/opentelemetry -H "X-API-
The examples below demonstrate how you can work with the `opentelemetry`
Plugin for different scenarios.
+### Enable Comprehensive Request Lifecycle Tracing
+
+:::note
+
+Enabling comprehensive tracing adds span creation and export overhead across
the request lifecycle, which may impact throughput and latency.
+
+:::
+
+To enable comprehensive tracing across the request lifecycle (SSL/SNI,
rewrite/access, header_filter/body_filter, and log), set the `tracing` field to
`true` in the configuration file:
+
+```yaml title="config.yaml"
+apisix:
+ tracing: true
+```
+
### Enable `opentelemetry` Plugin
By default, the `opentelemetry` Plugin is disabled in APISIX. To enable, add
the Plugin to your configuration file as such:
@@ -152,38 +167,124 @@ You should receive an `HTTP/1.1 200 OK` response.
In OpenTelemetry collector's log, you should see information similar to the
following:
```text
-2024-02-18T17:14:03.825Z info ResourceSpans #0
+info ResourceSpans #0
Resource SchemaURL:
Resource attributes:
-> telemetry.sdk.language: Str(lua)
-> telemetry.sdk.name: Str(opentelemetry-lua)
-> telemetry.sdk.version: Str(0.1.1)
- -> hostname: Str(e34673e24631)
+ -> hostname: Str(RC)
-> service.name: Str(APISIX)
ScopeSpans #0
ScopeSpans SchemaURL:
InstrumentationScope opentelemetry-lua
Span #0
- Trace ID : fbd0a38d4ea4a128ff1a688197bc58b0
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0adf392b5c84111
+ ID : d9816bbaef5ee63d
+ Name : http_router_match
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.846881024 +0000 UTC
+ End time : 2026-02-04 05:57:04.846951936 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #1
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : d0adf392b5c84111
+ Name : apisix.phase.access
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.846562048 +0000 UTC
+ End time : 2026-02-04 05:57:04.84724608 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #2
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : 4eb72d55359331fa
+ Name : resolve_dns
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.847251968 +0000 UTC
+ End time : 2026-02-04 05:57:04.84726912 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #3
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : de572aad9bad3b47
+ Name : apisix.phase.header_filter
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.84793088 +0000 UTC
+ End time : 2026-02-04 05:57:04.848005888 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #4
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : 0baddeee6e5d500d
+ Name : apisix.phase.body_filter
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.848007936 +0000 UTC
+ End time : 2026-02-04 05:57:04.848103936 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #5
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : d57d53882c40612a
+ Name : apisix.phase.log.plugins.opentelemetry
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.84823296 +0000 UTC
+ End time : 2026-02-04 05:57:04.848385024 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #6
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
Parent ID :
- ID : af3dc7642104748a
+ ID : d0c33adf97b099f3
Name : GET /anything
Kind : Server
- Start time : 2024-02-18 17:14:03.763244032 +0000 UTC
- End time : 2024-02-18 17:14:03.920229888 +0000 UTC
+ Start time : 2026-02-04 05:57:04.84655488 +0000 UTC
+ End time : 2026-02-04 05:57:04.84839296 +0000 UTC
Status code : Unset
Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
Attributes:
- -> net.host.name: Str(127.0.0.1)
+ -> net.host.name: Str(localhost)
-> http.method: Str(GET)
-> http.scheme: Str(http)
-> http.target: Str(/anything)
- -> http.user_agent: Str(curl/7.64.1)
+ -> http.user_agent: Str(curl/7.81.0)
+ -> http.request.method: Str(GET)
+ -> url.scheme: Str(http)
+ -> uri.path: Str(/anything)
+ -> user_agent.original: Str(curl/7.81.0)
-> apisix.route_id: Str(otel-tracing-route)
-> apisix.route_name: Empty()
-> http.route: Str(/anything)
-> http.status_code: Int(200)
-{"kind": "exporter", "data_type": "traces", "name": "debug"}
+ -> http.response.status_code: Int(200)
+{"resource": {"service.instance.id": "ed436c1a-6ee7-46b0-ad58-527d0aaf4ade",
"service.name": "otelcol-contrib", "service.version": "0.144.0"},
"otelcol.component.id": "debug", "otelcol.component.kind": "exporter",
"otelcol.signal": "traces"}
```
To visualize these traces, you can export your telemetry to backend Services,
such as Zipkin and Prometheus. See
[exporters](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter)
for more details.
diff --git a/docs/zh/latest/plugins/opentelemetry.md
b/docs/zh/latest/plugins/opentelemetry.md
index f22d90c93..ec5769704 100644
--- a/docs/zh/latest/plugins/opentelemetry.md
+++ b/docs/zh/latest/plugins/opentelemetry.md
@@ -94,6 +94,21 @@ curl
http://127.0.0.1:9180/apisix/admin/plugin_metadata/opentelemetry -H "X-API-
以下示例展示了如何在不同场景下使用 `opentelemetry` 插件。
+### 启用全面的请求生命周期追踪
+
+:::note
+
+开启全面追踪会在请求生命周期的各个阶段引入 span 的创建与上报开销,会对 APISIX 吞吐量和延迟产生影响。
+
+:::
+
+要在请求生命周期的各个阶段(包括
SSL/SNI、rewrite、access、header_filter、body_filter、log)启用全面追踪,请在配置文件中将 `tracing`
字段设置为 `true`:
+
+```yaml title="config.yaml"
+apisix:
+ tracing: true
+```
+
### 启用 opentelemetry 插件
默认情况下,APISIX 中的 `opentelemetry` 插件是禁用的。要启用它,请将插件添加到配置文件中,如下所示:
@@ -151,38 +166,124 @@ curl "http://127.0.0.1:9080/anything"
在 OpenTelemetry collector 的日志中,你应该看到类似以下的信息:
```text
-2024-02-18T17:14:03.825Z info ResourceSpans #0
+info ResourceSpans #0
Resource SchemaURL:
Resource attributes:
- -> telemetry.sdk.language: Str(lua)
- -> telemetry.sdk.name: Str(opentelemetry-lua)
- -> telemetry.sdk.version: Str(0.1.1)
- -> hostname: Str(e34673e24631)
- -> service.name: Str(APISIX)
+ -> telemetry.sdk.language: Str(lua)
+ -> telemetry.sdk.name: Str(opentelemetry-lua)
+ -> telemetry.sdk.version: Str(0.1.1)
+ -> hostname: Str(RC)
+ -> service.name: Str(APISIX)
ScopeSpans #0
ScopeSpans SchemaURL:
InstrumentationScope opentelemetry-lua
Span #0
- Trace ID : fbd0a38d4ea4a128ff1a688197bc58b0
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0adf392b5c84111
+ ID : d9816bbaef5ee63d
+ Name : http_router_match
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.846881024 +0000 UTC
+ End time : 2026-02-04 05:57:04.846951936 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #1
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : d0adf392b5c84111
+ Name : apisix.phase.access
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.846562048 +0000 UTC
+ End time : 2026-02-04 05:57:04.84724608 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #2
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : 4eb72d55359331fa
+ Name : resolve_dns
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.847251968 +0000 UTC
+ End time : 2026-02-04 05:57:04.84726912 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #3
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : de572aad9bad3b47
+ Name : apisix.phase.header_filter
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.84793088 +0000 UTC
+ End time : 2026-02-04 05:57:04.848005888 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #4
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : 0baddeee6e5d500d
+ Name : apisix.phase.body_filter
+ Kind : Server
+ Start time : 2026-02-04 05:57:04.848007936 +0000 UTC
+ End time : 2026-02-04 05:57:04.848103936 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #5
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
+ Parent ID : d0c33adf97b099f3
+ ID : d57d53882c40612a
+ Name : apisix.phase.log.plugins.opentelemetry
+ Kind : Internal
+ Start time : 2026-02-04 05:57:04.84823296 +0000 UTC
+ End time : 2026-02-04 05:57:04.848385024 +0000 UTC
+ Status code : Unset
+ Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
+Span #6
+ Trace ID : a5499493b517a3333578c2ac4fad3f4d
Parent ID :
- ID : af3dc7642104748a
+ ID : d0c33adf97b099f3
Name : GET /anything
Kind : Server
- Start time : 2024-02-18 17:14:03.763244032 +0000 UTC
- End time : 2024-02-18 17:14:03.920229888 +0000 UTC
+ Start time : 2026-02-04 05:57:04.84655488 +0000 UTC
+ End time : 2026-02-04 05:57:04.84839296 +0000 UTC
Status code : Unset
Status message :
+ DroppedAttributesCount: 0
+ DroppedEventsCount: 0
+ DroppedLinksCount: 0
Attributes:
- -> net.host.name: Str(127.0.0.1)
- -> http.method: Str(GET)
- -> http.scheme: Str(http)
- -> http.target: Str(/anything)
- -> http.user_agent: Str(curl/7.64.1)
- -> apisix.route_id: Str(otel-tracing-route)
- -> apisix.route_name: Empty()
- -> http.route: Str(/anything)
- -> http.status_code: Int(200)
-{"kind": "exporter", "data_type": "traces", "name": "debug"}
+ -> net.host.name: Str(localhost)
+ -> http.method: Str(GET)
+ -> http.scheme: Str(http)
+ -> http.target: Str(/anything)
+ -> http.user_agent: Str(curl/7.81.0)
+ -> http.request.method: Str(GET)
+ -> url.scheme: Str(http)
+ -> uri.path: Str(/anything)
+ -> user_agent.original: Str(curl/7.81.0)
+ -> apisix.route_id: Str(otel-tracing-route)
+ -> apisix.route_name: Empty()
+ -> http.route: Str(/anything)
+ -> http.status_code: Int(200)
+ -> http.response.status_code: Int(200)
+{"resource": {"service.instance.id": "ed436c1a-6ee7-46b0-ad58-527d0aaf4ade",
"service.name": "otelcol-contrib", "service.version": "0.144.0"},
"otelcol.component.id": "debug", "otelcol.component.kind": "exporter",
"otelcol.signal": "traces"}
```
要可视化这些追踪,你可以将 traces 导出到后端服务,例如 Zipkin 和
Prometheus。有关更多详细信息,请参阅[exporters](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter)。
diff --git a/t/plugin/opentelemetry6.t b/t/plugin/opentelemetry6.t
new file mode 100644
index 000000000..003e191f5
--- /dev/null
+++ b/t/plugin/opentelemetry6.t
@@ -0,0 +1,236 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BEGIN {
+ sub set_env_from_file {
+ my ($env_name, $file_path) = @_;
+
+ open my $fh, '<', $file_path or die $!;
+ my $content = do { local $/; <$fh> };
+ close $fh;
+
+ $ENV{$env_name} = $content;
+ }
+ # set env
+ set_env_from_file('TEST_CERT', 't/certs/apisix.crt');
+ set_env_from_file('TEST_KEY', 't/certs/apisix.key');
+ set_env_from_file('TEST2_CERT', 't/certs/test2.crt');
+ set_env_from_file('TEST2_KEY', 't/certs/test2.key');
+}
+use t::APISIX 'no_plan';
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!$block->extra_yaml_config) {
+ my $extra_yaml_config = <<_EOC_;
+apisix:
+ tracing: true
+plugins:
+ - opentelemetry
+_EOC_
+ $block->set_value("extra_yaml_config", $extra_yaml_config);
+ }
+
+ if (!$block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ if (!defined $block->response_body) {
+ $block->set_value("response_body", "passed\n");
+ }
+ $block;
+});
+repeat_each(1);
+no_long_string();
+no_root_location();
+log_level("debug");
+
+run_tests;
+
+__DATA__
+
+=== TEST 1: empty file
+--- exec
+echo '' > ci/pod/otelcol-contrib/data-otlp.json
+--- response_body eval
+qr//
+
+
+
+=== TEST 2: add plugin metadata
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/plugin_metadata/opentelemetry',
+ ngx.HTTP_PUT,
+ [[{
+ "batch_span_processor": {
+ "max_export_batch_size": 1,
+ "inactive_timeout": 0.5
+ },
+ "collector": {
+ "address": "127.0.0.1:4318",
+ "request_timeout": 3,
+ "request_headers": {
+ "foo": "bar"
+ }
+ },
+ "trace_id_source": "x-request-id"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+
+
+
+=== TEST 3: set route
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "opentelemetry": {
+ "sampler": {
+ "name": "always_on"
+ }
+ }
+ },
+ "upstream": {
+ "nodes": {
+ "test1.com:1980": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/opentracing"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+
+
+
+=== TEST 4: set ssl with two certs and keys in env
+--- config
+ location /t {
+ content_by_lua_block {
+ local core = require("apisix.core")
+ local t = require("lib.test_admin")
+
+ local data = {
+ snis = {"test.com"},
+ key = "$env://TEST_KEY",
+ cert = "$env://TEST_CERT",
+ keys = {"$env://TEST2_KEY"},
+ certs = {"$env://TEST2_CERT"}
+ }
+
+ local code, body = t.test('/apisix/admin/ssls/1',
+ ngx.HTTP_PUT,
+ core.json.encode(data),
+ [[{
+ "value": {
+ "snis": ["test.com"],
+ "key": "$env://TEST_KEY",
+ "cert": "$env://TEST_CERT",
+ "keys": ["$env://TEST2_KEY"],
+ "certs": ["$env://TEST2_CERT"]
+ },
+ "key": "/apisix/ssls/1"
+ }]]
+ )
+
+ ngx.status = code
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+
+
+
+=== TEST 5: trigger SSL match with SNI
+--- init_by_lua_block
+ require "resty.core"
+ apisix = require("apisix")
+ core = require("apisix.core")
+ apisix.http_init()
+
+ local utils = require("apisix.core.utils")
+ utils.dns_parse = function (domain) -- mock: DNS parser
+ if domain == "test1.com" then
+ return {address = "127.0.0.2"}
+ end
+
+ error("unknown domain: " .. domain)
+ end
+--- exec
+curl -k --resolve "test.com:1994:127.0.0.1" https://test.com:1994/opentracing
+--- wait: 5
+--- response_body
+opentracing
+
+
+
+=== TEST 6: check sni_radixtree_match span
+--- max_size: 1048576
+--- exec
+tail -n 18 ci/pod/otelcol-contrib/data-otlp.json
+--- response_body eval
+qr/.*sni_radixtree_match.*/
+
+
+
+=== TEST 7: check resolve_dns span
+--- max_size: 1048576
+--- exec
+tail -n 18 ci/pod/otelcol-contrib/data-otlp.json
+--- response_body eval
+qr/.*resolve_dns.*/
+
+
+
+=== TEST 8: check apisix.phase.access span
+--- max_size: 1048576
+--- exec
+tail -n 18 ci/pod/otelcol-contrib/data-otlp.json
+--- response_body eval
+qr/.*apisix.phase.access.*/
+
+
+
+=== TEST 9: check apisix.phase.header_filter span
+--- max_size: 1048576
+--- exec
+tail -n 18 ci/pod/otelcol-contrib/data-otlp.json
+--- response_body eval
+qr/.*apisix.phase.header_filter.*/