This is an automated email from the ASF dual-hosted git repository.
ashishtiwari pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 0151d9e35 fix(ai-proxy-multi): inconsistent resolved nodes for
healthcheck (#12594)
0151d9e35 is described below
commit 0151d9e35bba63d7c316187272d88e19db0be634
Author: Ashish Tiwari <[email protected]>
AuthorDate: Mon Sep 22 12:40:22 2025 +0530
fix(ai-proxy-multi): inconsistent resolved nodes for healthcheck (#12594)
---
apisix/plugins/ai-proxy-multi.lua | 39 +++++++++---
t/plugin/ai-proxy-multi3.t | 122 ++++++++++++++++++++++++++++++++++++++
2 files changed, 153 insertions(+), 8 deletions(-)
diff --git a/apisix/plugins/ai-proxy-multi.lua
b/apisix/plugins/ai-proxy-multi.lua
index a5682537e..bb32f5ffb 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -21,6 +21,7 @@ local base = require("apisix.plugins.ai-proxy.base")
local plugin = require("apisix.plugin")
local ipmatcher = require("resty.ipmatcher")
local healthcheck_manager = require("apisix.healthcheck_manager")
+local resource = require("apisix.resource")
local tonumber = tonumber
local pairs = pairs
@@ -185,13 +186,25 @@ local function resolve_endpoint(instance_conf)
host = ai_driver.host
port = ai_driver.port
end
- local node = {
+ local new_node = {
host = host,
- port = port,
+ port = tonumber(port),
scheme = scheme,
}
- parse_domain_for_node(node)
- return node
+ parse_domain_for_node(new_node)
+
+ -- Compare with existing node to see if anything changed
+ local old_node = instance_conf._dns_value
+ local nodes_changed = not old_node or
+ old_node.host ~= new_node.host
+
+ -- Only update if something changed
+ if nodes_changed then
+ instance_conf._dns_value = new_node
+ instance_conf._nodes_ver = (instance_conf._nodes_ver or 0) + 1
+ core.log.info("DNS resolution changed for instance: ",
instance_conf.name,
+ " new node: ", core.json.delay_encode(new_node))
+ end
end
@@ -221,7 +234,7 @@ local function fetch_health_instances(conf, checkers)
local host = ins.checks and ins.checks.active and
ins.checks.active.host
local port = ins.checks and ins.checks.active and
ins.checks.active.port
- local node = resolve_endpoint(ins)
+ local node = ins._dns_value
local ok, err = checker:get_target_status(node.host, port or
node.port, host)
if ok then
transform_instances(new_instances, ins)
@@ -276,7 +289,7 @@ end
function _M.construct_upstream(instance)
local upstream = {}
- local node = resolve_endpoint(instance)
+ local node = instance._dns_value
if not node then
return nil, "failed to resolve endpoint for instance: " ..
instance.name
end
@@ -285,8 +298,6 @@ function _M.construct_upstream(instance)
return nil, "invalid upstream node: " .. core.json.encode(node)
end
- parse_domain_for_node(node)
-
local node = {
host = node.host,
port = node.port,
@@ -297,18 +308,30 @@ function _M.construct_upstream(instance)
}
upstream.nodes = {node}
upstream.checks = instance.checks
+ upstream._nodes_ver = instance._nodes_ver
return upstream
end
local function pick_target(ctx, conf, ups_tab)
local checkers
+ local res_conf = resource.fetch_latest_conf(conf._meta.parent.resource_key)
+ if not res_conf then
+ return nil, nil, "failed to fetch the parent config"
+ end
+ local instances = res_conf.value.plugins[plugin_name].instances
for i, instance in ipairs(conf.instances) do
if instance.checks then
+ resolve_endpoint(instance)
-- json path is 0 indexed so we need to decrement i
local resource_path = conf._meta.parent.resource_key ..
"#plugins['ai-proxy-multi'].instances[" ..
i-1 .. "]"
local resource_version = conf._meta.parent.resource_version
+ if instance._nodes_ver then
+ resource_version = resource_version .. instance._nodes_ver
+ end
+ instances[i]._dns_value = instance._dns_value
+ instances[i]._nodes_ver = instance._nodes_ver
local checker = healthcheck_manager.fetch_checker(resource_path,
resource_version)
checkers = checkers or {}
checkers[instance.name] = checker
diff --git a/t/plugin/ai-proxy-multi3.t b/t/plugin/ai-proxy-multi3.t
index dc4601208..33f317d63 100644
--- a/t/plugin/ai-proxy-multi3.t
+++ b/t/plugin/ai-proxy-multi3.t
@@ -909,3 +909,125 @@ POST /ai
]
}
--- error_code: 401
+
+
+
+=== TEST 13: DNS change doesn't cause health check errors
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local resolver = require("apisix.core.resolver")
+ -- Mock resolver.parse_domain to return different IPs on different
calls
+ local original_parse_domain = resolver.parse_domain
+ local call_count = 0
+ resolver.parse_domain = function(host)
+ if host == "test.example.com" then
+ call_count = call_count + 1
+ if call_count == 1 then
+ return "127.0.0.1"
+ else
+ return "127.0.0.2"
+ end
+ end
+ return original_parse_domain(host)
+ end
+ -- Create a route with health check that uses the domain
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/ai",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [
+ {
+ "name": "openai-test",
+ "provider": "openai",
+ "weight": 1,
+ "priority": 1,
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gpt-4"
+ },
+ "override": {
+ "endpoint":
"http://test.example.com:16724"
+ },
+ "checks": {
+ "active": {
+ "timeout": 5,
+ "http_path": "/status/test",
+ "host": "test.example.com",
+ "healthy": {
+ "interval": 1,
+ "successes": 1
+ },
+ "unhealthy": {
+ "interval": 1,
+ "http_failures": 1
+ }
+ }
+ }
+ },
+ {"name": "openai-test-2","provider":
"openai","weight": 1,"priority": 1,"auth": {"header": {"Authorization": "Bearer
token"}},"options": {"model": "gpt-4"},"override": {"endpoint":
"http://test.example.com:16724"},"checks": {"active": {"timeout":
5,"http_path": "/status/test","host": "test.example.com","healthy":
{"interval": 1,"successes": 1},"unhealthy": {"interval": 1,"http_failures":
1}}}}
+ ],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+
+ local code, _, body = t("/ai",
+ ngx.HTTP_POST,
+ [[{
+ "messages": [
+ { "role": "system", "content": "You are a
mathematician" },
+ { "role": "user", "content": "What is 1+1?" }
+ ]
+ }]],
+ nil,
+ {
+ ["test-type"] = "options",
+ ["Content-Type"] = "application/json",
+ }
+ )
+
+ -- Wait a bit for health check to run
+ ngx.sleep(1.5)
+
+ local code, _, body = t("/ai",
+ ngx.HTTP_POST,
+ [[{
+ "messages": [
+ { "role": "system", "content": "You are a
mathematician" },
+ { "role": "user", "content": "What is 1+1?" }
+ ]
+ }]],
+ nil,
+ {
+ ["test-type"] = "options",
+ ["Content-Type"] = "application/json",
+ }
+ )
+
+ -- Restore original function
+ resolver.parse_domain = original_parse_domain
+ ngx.sleep(3)
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+passed
+--- no_error_log
+failed to get health check target status
+--- error_log
+releasing existing checker
+--- timeout: 5