Copilot commented on code in PR #12686:
URL: https://github.com/apache/apisix/pull/12686#discussion_r2762500654
##########
apisix/secret.lua:
##########
@@ -148,16 +150,19 @@ local function fetch_by_uri_secret(secret_uri)
return nil, "no secret conf, secret_uri: " .. secret_uri
end
+ local span = tracer.start(ngx.ctx, "fetch_secret", tracer.kind.client)
local ok, sm = pcall(require, "apisix.secret." .. opts.manager)
if not ok then
return nil, "no secret manager: " .. opts.manager
end
local value, err = sm.get(conf, opts.key)
if err then
+ tracer.finish(ngx.ctx, span, tracer.status.ERROR, err)
return nil, err
Review Comment:
`tracer.finish` is called with the wrong argument order on the error path.
It should be `tracer.finish(ngx.ctx, span, tracer.status.ERROR, err)` (or
equivalent) so it doesn't attempt to treat the status enum as a span object.
##########
apisix/core/response.lua:
##########
@@ -86,6 +87,9 @@ function resp_exit(code, ...)
end
if code then
+ if code >= 400 then
+ tracer.finish_all(ngx.ctx, tracer.status.ERROR, "response code "
.. code)
Review Comment:
This call uses `tracer.finish(ngx.ctx, tracer.status.ERROR, ...)` but
`finish` expects `(ctx, sp, code, message)`. As written it will crash for any
`code >= 400` response when tracing is enabled. If the intent is “finish
current span with error”, pass an explicit `nil` span (`finish(ctx, nil, code,
msg)`) or introduce a dedicated `finish_current(ctx, code, msg)` helper.
```suggestion
tracer.finish_all(ngx.ctx, nil, code, "response code " .. code)
```
##########
apisix/tracer.lua:
##########
@@ -0,0 +1,131 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local table = require("apisix.core.table")
+local tablepool = require("tablepool")
+local span = require("apisix.utils.span")
+local span_kind = require("opentelemetry.trace.span_kind")
+local span_status = require("opentelemetry.trace.span_status")
+local local_conf = require("apisix.core.config_local").local_conf()
+local ipairs = ipairs
+local ngx = ngx
+
+local enable_tracing = false
+if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) ==
"boolean" then
+ enable_tracing = local_conf.apisix.tracing
+end
+
+local _M = {
+ kind = span_kind,
+ status = span_status,
+ span_state = {},
+}
+
+function _M.start(ctx, name, kind)
+ if not enable_tracing then
+ return
+ end
+
+ local tracing = ctx and ctx.tracing
+ if not tracing then
+ local root_span = span.new()
+ tracing = tablepool.fetch("tracing", 0, 8)
+ tracing.spans = tablepool.fetch("tracing_spans", 20, 0)
+ tracing.root_span = root_span
+ tracing.current_span = root_span
+ table.insert(tracing.spans, root_span)
+ root_span.id = 1
+ ctx.tracing = tracing
+ end
+ if tracing.skip then
+ return
+ end
+
+ local spans = tracing.spans
+ local sp = span.new(name, kind)
+
+ table.insert(spans, sp)
+ local id = #spans
+ sp.id = id
+ local parent = tracing.current_span
+ if parent then
+ sp:set_parent(parent.id)
+ parent:append_child(id)
+ end
+ tracing.current_span = sp
+ return sp
+end
+
+
+local function finish_span(spans, sp, code, message)
+ if not sp or sp.end_time then
+ return
+ end
+ for _, id in ipairs(sp.child_ids or {}) do
+ finish_span(spans, spans[id])
+ end
+ if code then
+ sp:set_status(code, message)
+ end
+ sp:finish()
+end
+
+
+function _M.finish(ctx, sp, code, message)
+ local tracing = ctx and ctx.tracing
+ if not tracing then
+ return
+ end
+
+ sp = sp or tracing.current_span
+ if not sp then
+ return
+ end
+
+ finish_span(tracing.spans, sp, code, message)
+ tracing.current_span = tracing.spans[sp.parent_id]
+end
Review Comment:
`finish()` defaults to `tracing.current_span` when `sp` is nil. Combined
with `start()` returning nil when `tracing.skip` is true, common call sites
(`local sp = start(...); finish(ctx, sp)`) will incorrectly finish the
*current* span even though no new span was started. Consider making
`finish(ctx, sp, ...)` a no-op when `sp` is nil, and add a separate
`finish_current(ctx, ...)` API for the cases where finishing the current span
is desired.
##########
apisix/plugins/opentelemetry.lua:
##########
@@ -327,10 +328,17 @@ function _M.rewrite(conf, api_ctx)
local attributes = {
attr.string("net.host.name", vars.host),
+ -- deprecated attributes
attr.string("http.method", vars.method),
attr.string("http.scheme", vars.scheme),
attr.string("http.target", vars.request_uri),
attr.string("http.user_agent", vars.http_user_agent),
+
+ -- new attributes
+ attr.string("http.request.method", vars.method),
+ attr.string("url.scheme", vars.scheme),
+ attr.string("uri.path",vars.request_uri),
Review Comment:
`uri.path` should contain only the URL path and not the query string.
`vars.request_uri` typically includes args (e.g. `/foo?a=1`), so this attribute
will be incorrect. Use `vars.uri` (or another path-only value) for `uri.path`
and keep `http.target`/`request_uri` for the full target if needed.
```suggestion
attr.string("uri.path", vars.uri),
```
##########
apisix/ssl/router/radixtree_sni.lua:
##########
@@ -177,9 +179,10 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
-- with it sometimes
core.log.error("failed to find any SSL certificate by SNI: ", sni)
end
+ tracer.finish(api_ctx.ngx_ctx, span, tracer.status.ERROR, "failed
match SNI")
Review Comment:
`tracer.finish` is called with the wrong argument order here. The signature
is `finish(ctx, sp, code, message)`, but this passes `(ctx, code, message)`
which will attempt to index a non-span value and can crash on SNI-miss. Pass
the started `span` as the second argument (and the error code/message as
3rd/4th), and ensure the span is finished before returning.
##########
apisix/tracer.lua:
##########
@@ -0,0 +1,131 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local table = require("apisix.core.table")
+local tablepool = require("tablepool")
+local span = require("apisix.utils.span")
+local span_kind = require("opentelemetry.trace.span_kind")
+local span_status = require("opentelemetry.trace.span_status")
+local local_conf = require("apisix.core.config_local").local_conf()
+local ipairs = ipairs
+local ngx = ngx
+
+local enable_tracing = false
+if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) ==
"boolean" then
+ enable_tracing = local_conf.apisix.tracing
+end
+
+local _M = {
+ kind = span_kind,
+ status = span_status,
+ span_state = {},
+}
+
+function _M.start(ctx, name, kind)
+ if not enable_tracing then
+ return
+ end
+
+ local tracing = ctx and ctx.tracing
+ if not tracing then
+ local root_span = span.new()
+ tracing = tablepool.fetch("tracing", 0, 8)
+ tracing.spans = tablepool.fetch("tracing_spans", 20, 0)
+ tracing.root_span = root_span
+ tracing.current_span = root_span
+ table.insert(tracing.spans, root_span)
+ root_span.id = 1
+ ctx.tracing = tracing
+ end
Review Comment:
`start()` can be called with `ctx == nil` (call sites already pass `ngx.ctx`
in some places, but this function itself allows `ctx` to be nil via `ctx and
ctx.tracing`). However it unconditionally assigns `ctx.tracing = tracing`,
which will throw if `ctx` is nil. Add an early return when `ctx` is nil (or
require callers to always pass a non-nil ctx).
##########
apisix/utils/span.lua:
##########
@@ -0,0 +1,101 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local tablepool = require("tablepool")
+local util = require("opentelemetry.util")
+local span_status = require("opentelemetry.trace.span_status")
+local setmetatable = setmetatable
+local table = table
+local select = select
+local pool_name = "opentelemetry_span"
+local update_time = ngx.update_time
+
+local _M = {}
+
+
+local mt = {
+ __index = _M
+}
+
+local function get_time()
+ update_time()
+ return util.time_nano()
+end
+
+
+function _M.new(name, kind)
+ local self = tablepool.fetch(pool_name, 0, 16)
+ self.start_time = get_time()
+ self.name = name
+ self.kind = kind
+ self.status = nil
+ self.dead = false
+ return setmetatable(self, mt)
+end
Review Comment:
`_M.new()` does not initialize/reset fields that other methods rely on (e.g.
`self.attributes` is never set, but `set_attributes()` does
`table.insert(self.attributes, ...)` which will error). Initialize `attributes`
to an empty array and ensure pooled fields like `child_ids`, `parent_id`,
`end_time`, `id`, and any temporary flags are reset when fetching from the pool.
##########
apisix/plugins/opentelemetry.lua:
##########
@@ -376,48 +384,96 @@ function _M.rewrite(conf, api_ctx)
ngx_var.opentelemetry_span_id = span_context.span_id
end
+ if not ctx:span():is_recording() and ngx.ctx.tracing then
+ ngx.ctx.tracing.skip = true
+ end
+
api_ctx.otel_context_token = ctx:attach()
-- inject trace context into the headers of upstream HTTP request
trace_context_propagator:inject(ctx, ngx.req)
end
-function _M.delayed_body_filter(conf, api_ctx)
- if api_ctx.otel_context_token and ngx.arg[2] then
- local ctx = context:current()
- ctx:detach(api_ctx.otel_context_token)
- api_ctx.otel_context_token = nil
+local function create_child_span(tracer, parent_span_ctx, spans, span)
+ if not span or span.finished then
+ return
+ end
+ span.finished = true
+ local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name,
+ {
+ kind = span.kind,
+ attributes = span.attributes,
+ })
+ new_span.start_time = span.start_time
+
+ for _, idx in ipairs(span.child_ids or {}) do
+ create_child_span(tracer, new_span_ctx, spans, spans[idx])
+ end
+ if span.status then
+ new_span:set_status(span.status.code, span.status.message)
+ end
+ new_span:finish(span.end_time)
+end
Review Comment:
`create_child_span()` calls `new_span:finish(span.end_time)` but spans
created during the current log phase (e.g. the plugin-wrapper span around this
opentelemetry plugin) have not been finished yet when `inject_core_spans()`
runs, so `span.end_time` can be nil here. Depending on the OpenTelemetry Lua
API this can either error or produce incorrect end timestamps. Consider
exporting only spans with `end_time` set, or moving
`tracer.finish_all(...)`/core-span injection to a point after log-phase spans
are finished.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]