This is an automated email from the ASF dual-hosted git repository. nic443 pushed a commit to branch nic/opentelemetry in repository https://gitbox.apache.org/repos/asf/apisix.git
commit b5197a1d5a7a4aaaa303bbd0e2cf1c0f01bda8bf Author: Nic <[email protected]> AuthorDate: Sun Oct 19 18:24:20 2025 +0800 feat: add more spans to opentelemetry plugin Signed-off-by: Nic <[email protected]> --- apisix/core/response.lua | 6 +++ apisix/init.lua | 16 ++++++++ apisix/plugins/opentelemetry.lua | 44 ++++++++++++++++++++++ apisix/secret.lua | 5 +++ apisix/ssl/router/radixtree_sni.lua | 9 +++++ apisix/utils/span.lua | 73 ++++++++++++++++++++++++++++++++++++ apisix/utils/stack.lua | 75 +++++++++++++++++++++++++++++++++++++ apisix/utils/tracer.lua | 64 +++++++++++++++++++++++++++++++ 8 files changed, 292 insertions(+) diff --git a/apisix/core/response.lua b/apisix/core/response.lua index baee97749..27be28c57 100644 --- a/apisix/core/response.lua +++ b/apisix/core/response.lua @@ -19,6 +19,7 @@ -- -- @module core.response +local tracer = require("apisix.utils.tracer") local encode_json = require("cjson.safe").encode local ngx = ngx local arg = ngx.arg @@ -62,6 +63,7 @@ function resp_exit(code, ...) ngx.status = code end + local message for i = 1, select('#', ...) do local v = select(i, ...) if type(v) == "table" then @@ -73,6 +75,7 @@ function resp_exit(code, ...) t[idx] = body idx = idx + 1 t[idx] = "\n" + message = body end elseif v ~= nil then @@ -86,6 +89,9 @@ function resp_exit(code, ...) end if code then + if code >= 400 then + tracer.finish_current_span(tracer.status.ERROR, message or ("response code " .. code)) + end return ngx_exit(code) end end diff --git a/apisix/init.lua b/apisix/init.lua index 430572e27..c242ea6a4 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -47,6 +47,7 @@ local debug = require("apisix.debug") local pubsub_kafka = require("apisix.pubsub.kafka") local resource = require("apisix.resource") local trusted_addresses_util = require("apisix.utils.trusted-addresses") +local tracer = require("apisix.utils.tracer") local ngx = ngx local get_method = ngx.req.get_method local ngx_exit = ngx.exit @@ -203,6 +204,8 @@ function _M.ssl_client_hello_phase() local api_ctx = core.tablepool.fetch("api_ctx", 0, 32) ngx_ctx.api_ctx = api_ctx + local span = tracer.new_span("ssl_client_hello_phase", tracer.kind.server) + local ok, err = router.router_ssl.match_and_set(api_ctx, true, sni) ngx_ctx.matched_ssl = api_ctx.matched_ssl @@ -215,18 +218,23 @@ function _M.ssl_client_hello_phase() core.log.error("failed to fetch ssl config: ", err) end core.log.error("failed to match any SSL certificate by SNI: ", sni) + span:set_status(tracer.status.ERROR, "failed match SNI") + tracer.finish_current_span() ngx_exit(-1) end ok, err = apisix_ssl.set_protocols_by_clienthello(ngx_ctx.matched_ssl.value.ssl_protocols) if not ok then core.log.error("failed to set ssl protocols: ", err) + span:set_status(tracer.status.ERROR, "failed set protocols") + tracer.finish_current_span() ngx_exit(-1) end -- in stream subsystem, ngx.ssl.server_name() return hostname of ssl session in preread phase, -- so that we can't get real SNI without recording it in ngx.ctx during client_hello phase ngx.ctx.client_hello_sni = sni + tracer.finish_current_span() end @@ -666,6 +674,7 @@ end function _M.http_access_phase() + tracer.new_span("http_access_phase", tracer.kind.server) -- from HTTP/3 to HTTP/1.1 we need to convert :authority pesudo-header -- to Host header, so we set upstream_host variable here. if ngx.req.http_version() == 3 then @@ -716,19 +725,26 @@ function _M.http_access_phase() handle_x_forwarded_headers(api_ctx) + local router_match_span = tracer.new_span("http_router_match", tracer.kind.internal) router.router_http.match(api_ctx) local route = api_ctx.matched_route if not route then + tracer.new_span("run_global_rules", tracer.kind.internal) -- run global rule when there is no matching route local global_rules = apisix_global_rules.global_rules() plugin.run_global_rules(api_ctx, global_rules, nil) + tracer.finish_current_span() core.log.info("not find any matched route") + router_match_span:set_status(tracer.status.ERROR, "no matched route") + tracer.finish_current_span() return core.response.exit(404, {error_msg = "404 Route Not Found"}) end + tracer.finish_current_span() + core.log.info("matched route: ", core.json.delay_encode(api_ctx.matched_route, true)) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index d98ac44ae..ed10aed8b 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -36,6 +36,7 @@ local span_status = require("opentelemetry.trace.span_status") local resource_new = require("opentelemetry.resource").new local attr = require("opentelemetry.attribute") +local new_context = require("opentelemetry.context").new local context = require("opentelemetry.context").new() local trace_context_propagator = require("opentelemetry.trace.propagation.text_map.trace_context_propagator").new() @@ -376,6 +377,10 @@ function _M.rewrite(conf, api_ctx) ngx_var.opentelemetry_span_id = span_context.span_id end + if not ctx:span():is_recording() then + ngx.ctx._apisix_skip_tracing = true + end + api_ctx.otel_context_token = ctx:attach() -- inject trace context into the headers of upstream HTTP request @@ -383,6 +388,41 @@ function _M.rewrite(conf, api_ctx) end +local function create_child_span(tracer, parent_span_ctx, span) + local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name, + { + kind = span.kind, + attributes = span.attributes, + }) + new_span.start_time = span.start_time + + for _, child in ipairs(span.children or {}) do + create_child_span(tracer, new_span_ctx, child) + end + + new_span:set_status(span.status, span.status) + new_span:finish(span.end_time) +end + + +local function inject_core_spans(root_span_ctx, api_ctx, conf) + local metadata = plugin.plugin_metadata(plugin_name) + local plugin_info = metadata.value + if not root_span_ctx:span():is_recording() then + return + end + local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, + create_tracer_obj, conf, plugin_info) + if not tracer then + core.log.error("failed to fetch tracer object: ", err) + return + end + for _, sp in ipairs(ngx.ctx._apisix_spans or {}) do + create_child_span(tracer, root_span_ctx, sp) + end +end + + function _M.delayed_body_filter(conf, api_ctx) if api_ctx.otel_context_token and ngx.arg[2] then local ctx = context:current() @@ -399,6 +439,8 @@ function _M.delayed_body_filter(conf, api_ctx) span:set_attributes(attr.int("http.status_code", upstream_status)) + inject_core_spans(ctx, api_ctx, conf) + span:finish() end end @@ -418,6 +460,8 @@ function _M.log(conf, api_ctx) "upstream response status: " .. upstream_status) end + inject_core_spans(span, api_ctx, conf) + span:finish() end end diff --git a/apisix/secret.lua b/apisix/secret.lua index b8d7b19a5..1db6e7246 100644 --- a/apisix/secret.lua +++ b/apisix/secret.lua @@ -18,6 +18,7 @@ local require = require local core = require("apisix.core") local string = require("apisix.core.string") +local tracer = require("apisix.utils.tracer") local local_conf = require("apisix.core.config_local").local_conf() @@ -148,6 +149,7 @@ local function fetch_by_uri(secret_uri) return nil, "no secret conf, secret_uri: " .. secret_uri end + local span = tracer.new_span("fetch_secret", tracer.kind.client) local ok, sm = pcall(require, "apisix.secret." .. opts.manager) if not ok then return nil, "no secret manager: " .. opts.manager @@ -155,9 +157,12 @@ local function fetch_by_uri(secret_uri) local value, err = sm.get(conf, opts.key) if err then + span:set_status(tracer.status.ERROR, err) + tracer.finish_current_span() return nil, err end + tracer.finish_current_span() return value end diff --git a/apisix/ssl/router/radixtree_sni.lua b/apisix/ssl/router/radixtree_sni.lua index ae7e5b265..027cbb19a 100644 --- a/apisix/ssl/router/radixtree_sni.lua +++ b/apisix/ssl/router/radixtree_sni.lua @@ -21,6 +21,7 @@ local apisix_ssl = require("apisix.ssl") local secret = require("apisix.secret") local ngx_ssl = require("ngx.ssl") local config_util = require("apisix.core.config_util") +local tracer = require("apisix.utils.tracer") local ngx = ngx local ipairs = ipairs local type = type @@ -149,11 +150,15 @@ function _M.match_and_set(api_ctx, match_only, alt_sni) local err if not radixtree_router or radixtree_router_ver ~= ssl_certificates.conf_version then + local span = tracer.new_span("create_router", tracer.kind.internal) radixtree_router, err = create_router(ssl_certificates.values) if not radixtree_router then + span:set_status(tracer.status.ERROR, "failed create router") + tracer.finish_current_span() return false, "failed to create radixtree router: " .. err end radixtree_router_ver = ssl_certificates.conf_version + tracer.finish_current_span() end local sni = alt_sni @@ -170,6 +175,7 @@ function _M.match_and_set(api_ctx, match_only, alt_sni) core.log.debug("sni: ", sni) local sni_rev = sni:reverse() + local span = tracer.new_span("sni_radixtree_match", tracer.kind.internal) local ok = radixtree_router:dispatch(sni_rev, nil, api_ctx) if not ok then if not alt_sni then @@ -177,8 +183,11 @@ function _M.match_and_set(api_ctx, match_only, alt_sni) -- with it sometimes core.log.error("failed to find any SSL certificate by SNI: ", sni) end + span:set_status(tracer.status.ERROR, "failed match SNI") + tracer.finish_current_span() return false end + tracer.finish_current_span() if type(api_ctx.matched_sni) == "table" then diff --git a/apisix/utils/span.lua b/apisix/utils/span.lua new file mode 100644 index 000000000..c1eacd237 --- /dev/null +++ b/apisix/utils/span.lua @@ -0,0 +1,73 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local util = require("opentelemetry.util") +local span_status = require("opentelemetry.trace.span_status") + + +local _M = {} + + +local mt = { + __index = _M +} + + +function _M.new(name, kind) + local self = { + name = name, + start_time = util.time_nano(), + end_time = 0, + kind = kind, + attributes = {}, + children = {}, + } + return setmetatable(self, mt) +end + + +function _M.append_child(self, span) + table.insert(self.children, span) +end + + +function _M.set_status(self, code, message) + code = span_status.validate(code) + local status = { + code = code, + message = "" + } + if code == span_status.ERROR then + status.message = message + end + + self.status = status +end + + +function _M.set_attributes(self, ...) + for _, attr in ipairs({ ... }) do + table.insert(self.attributes, attr) + end +end + + +function _M.finish(self) + self.end_time = util.time_nano() +end + + +return _M diff --git a/apisix/utils/stack.lua b/apisix/utils/stack.lua new file mode 100644 index 000000000..a88e4832a --- /dev/null +++ b/apisix/utils/stack.lua @@ -0,0 +1,75 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local _M = {} +local mt = { __index = _M } + +function _M.new() + local self = { + _data = {}, + _n = 0, + } + return setmetatable(self, mt) +end + + +function _M.push(self, value) + self._n = self._n + 1 + self._data[self._n] = value +end + + +function _M.pop(self) + if self._n == 0 then + return nil + end + + local value = self._data[self._n] + self._data[self._n] = nil + self._n = self._n - 1 + return value +end + + +function _M.peek(self) + if self._n == 0 then + return nil + end + + return self._data[self._n] +end + + +function _M.is_empty(self) + return self._n == 0 +end + + +function _M.size(self) + return self._n +end + + +function _M.clear(self) + for i = 1, self._n do + self._data[i] = nil + end + self._n = 0 +end + + +return _M + diff --git a/apisix/utils/tracer.lua b/apisix/utils/tracer.lua new file mode 100644 index 000000000..d078562e2 --- /dev/null +++ b/apisix/utils/tracer.lua @@ -0,0 +1,64 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local ngx = ngx +local stack = require("apisix.utils.stack") +local span = require("apisix.utils.span") +local span_kind = require("opentelemetry.trace.span_kind") +local span_status = require("opentelemetry.trace.span_status") + + +local _M = { + kind = span_kind, + status = span_status, +} + + +function _M.new_span(name, kind) + local ctx = ngx.ctx + if not ctx._apisix_spans then + ctx._apisix_spans = {} + end + if not ctx._apisix_span_stack then + ctx._apisix_span_stack = stack.new() + end + local sp = span.new(name, kind) + if ctx._apisix_skip_tracing then + return sp + end + if ctx._apisix_span_stack:is_empty() then + table.insert(ctx._apisix_spans, sp) + else + local parent_span = ctx._apisix_span_stack:peek() + parent_span:append_child(sp) + end + ctx._apisix_span_stack:push(sp) +end + + +function _M.finish_current_span(code, message) + if not ngx.ctx._apisix_span_stack then + return + end + local sp = ngx.ctx._apisix_span_stack:pop() + if code then + sp:set_status(code, message) + end + sp:finish() +end + + +return _M
