This is an automated email from the ASF dual-hosted git repository.
nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 4f40a4653 fix(ai-proxy-multi): stabilize domain health checks (#13441)
4f40a4653 is described below
commit 4f40a465321d71763f3b88e4a9b69ac47278b547
Author: Nic <[email protected]>
AuthorDate: Wed May 27 11:36:05 2026 +0800
fix(ai-proxy-multi): stabilize domain health checks (#13441)
---
apisix/core/resolver.lua | 59 ++++
apisix/plugins/ai-providers/base.lua | 6 +-
apisix/plugins/ai-proxy-multi.lua | 201 +++++++++---
apisix/plugins/ai-proxy/base.lua | 5 +-
apisix/plugins/ai-transport/auth-aws.lua | 7 +-
t/plugin/ai-proxy-multi-domain-healthcheck-repro.t | 244 +++++++++++++++
t/plugin/ai-proxy-multi-domain-healthcheck.t | 338 +++++++++++++++++++++
t/plugin/ai-proxy-multi.balancer.t | 26 +-
t/plugin/ai-proxy-multi3.t | 34 ++-
9 files changed, 850 insertions(+), 70 deletions(-)
diff --git a/apisix/core/resolver.lua b/apisix/core/resolver.lua
index 3568a9762..e1e75dccf 100644
--- a/apisix/core/resolver.lua
+++ b/apisix/core/resolver.lua
@@ -22,8 +22,11 @@
local json = require("apisix.core.json")
local log = require("apisix.core.log")
local utils = require("apisix.core.utils")
+local dns_client = require("apisix.core.dns.client")
local dns_utils = require("resty.dns.utils")
local config_local = require("apisix.core.config_local")
+local ipairs = ipairs
+local table_sort = table.sort
local HOSTS_IP_MATCH_CACHE = {}
@@ -93,4 +96,60 @@ function _M.parse_domain(host)
end
+local function sort_ip(a, b)
+ return a < b
+end
+
+
+function _M.parse_domain_all(host)
+ local ips = {}
+ local seen = {}
+
+ local rev = HOSTS_IP_MATCH_CACHE[host]
+ local enable_ipv6 = config_local.local_conf().apisix.enable_ipv6
+ if rev then
+ if rev["ipv4"] then
+ ips[#ips + 1] = rev["ipv4"]
+ seen[rev["ipv4"]] = true
+ end
+
+ if enable_ipv6 and rev["ipv6"] and not seen[rev["ipv6"]] then
+ ips[#ips + 1] = rev["ipv6"]
+ end
+ end
+
+ if #ips > 0 then
+ table_sort(ips, sort_ip)
+ log.info("dns resolve ", host, ", result: ", json.delay_encode(ips))
+ return ips
+ end
+
+ local records, err = utils.dns_parse(host, dns_client.RETURN_ALL)
+ if not records then
+ log.error("failed to parse domain: ", host, ", error: ", err)
+ return nil, err
+ end
+
+ log.info("parse addr: ", json.delay_encode(records))
+ log.info("resolver: ", json.delay_encode(utils.get_resolver()))
+ log.info("host: ", host)
+
+ for _, record in ipairs(records) do
+ local ip = record.address
+ if ip and not seen[ip] then
+ ips[#ips + 1] = ip
+ seen[ip] = true
+ end
+ end
+
+ if #ips == 0 then
+ return nil, "failed to parse domain"
+ end
+
+ table_sort(ips, sort_ip)
+ log.info("dns resolve ", host, ", result: ", json.delay_encode(ips))
+ return ips
+end
+
+
return _M
diff --git a/apisix/plugins/ai-providers/base.lua
b/apisix/plugins/ai-providers/base.lua
index f7cde8a53..407fe0bb8 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -182,6 +182,9 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
end
local headers = transport_http.construct_forward_headers(auth.header or
{}, ctx)
+ if opts.host_header then
+ headers["Host"] = opts.host_header
+ end
if token then
headers["authorization"] = "Bearer " .. token
end
@@ -200,7 +203,8 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
query = query_params,
host = host,
port = port,
- ssl_server_name = parsed_url and parsed_url.host
+ ssl_server_name = opts.ssl_server_name
+ or parsed_url and parsed_url.host
or opts.target_host or self.host,
}
diff --git a/apisix/plugins/ai-proxy-multi.lua
b/apisix/plugins/ai-proxy-multi.lua
index 29f76b045..3c6ac9330 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -26,12 +26,15 @@ local resource = require("apisix.resource")
local exporter = require("apisix.plugins.prometheus.exporter")
local tonumber = tonumber
local pairs = pairs
+local table_sort = table.sort
+local math_random = math.random
local require = require
local pcall = pcall
local ipairs = ipairs
local type = type
local string = string
+local url = require("socket.url")
local priority_balancer = require("apisix.balancer.priority")
local endpoint_regex = "^(https?)://([^:/]+):?(%d*)/?.*$"
@@ -165,32 +168,130 @@ local function transform_instances(new_instances,
instance)
new_instances[instance.priority][instance.name] = instance.weight
end
-local function parse_domain_for_node(node)
+local function sort_nodes(a, b)
+ if a.host == b.host then
+ return (a.port or 0) < (b.port or 0)
+ end
+ return a.host < b.host
+end
+
+
+local function nodes_equal(old_nodes, new_nodes)
+ if old_nodes == new_nodes then
+ return true
+ end
+
+ if type(old_nodes) ~= "table" or #old_nodes ~= #new_nodes then
+ return false
+ end
+
+ for i, new_node in ipairs(new_nodes) do
+ local old_node = old_nodes[i]
+ for _, field in ipairs({"host", "port", "scheme", "domain"}) do
+ if old_node[field] ~= new_node[field] then
+ return false
+ end
+ end
+ end
+
+ return true
+end
+
+
+local function parse_domain_for_nodes(node)
local host = node.domain or node.host
if not ipmatcher.parse_ipv4(host)
and not ipmatcher.parse_ipv6(host)
then
- node.domain = host
-
- local ip, err = core.resolver.parse_domain(host)
- if ip then
- node.host = ip
- end
-
+ local ips, err = core.resolver.parse_domain_all(host)
if err then
core.log.error("dns resolver domain: ", host, " error: ", err)
end
+
+ if ips then
+ local nodes = core.table.new(#ips, 0)
+ for _, ip in ipairs(ips) do
+ local new_node = core.table.clone(node)
+ new_node.host = ip
+ new_node.domain = host
+ core.table.insert(nodes, new_node)
+ end
+ table_sort(nodes, sort_nodes)
+ return nodes
+ end
+ end
+
+ return {node}
+end
+
+
+local function make_endpoint(node)
+ local host = node.host
+ if ipmatcher.parse_ipv6(host) then
+ host = "[" .. host .. "]"
+ end
+
+ local endpoint = node.scheme .. "://" .. host .. ":" .. node.port
+ if node.path then
+ endpoint = endpoint .. node.path
+ end
+ if node.query then
+ endpoint = endpoint .. "?" .. node.query
end
+ return endpoint
end
--- resolves endpoint and sets it on __dns_value
+
+local function make_host_header(node)
+ if not node.domain then
+ return nil
+ end
+
+ local port = tonumber(node.port)
+ if (node.scheme == "https" and port ~= 443)
+ or (node.scheme ~= "https" and port ~= 80)
+ then
+ return node.domain .. ":" .. node.port
+ end
+
+ return node.domain
+end
+
+
+local function use_node_for_request(instance_conf, node)
+ if not node then
+ return
+ end
+
+ instance_conf._dns_value = node
+ instance_conf._resolved_endpoint = make_endpoint(node)
+ instance_conf._resolved_host_header = make_host_header(node)
+ instance_conf._resolved_ssl_server_name = node.domain
+end
+
+
+local function pick_request_node(nodes)
+ if not nodes or #nodes == 0 then
+ return
+ end
+
+ return nodes[math_random(1, #nodes)]
+end
+
+
+-- resolves endpoint and sets it on _dns_nodes
local function resolve_endpoint(instance_conf)
- local scheme, host, port
+ local scheme, host, port, path, query
local endpoint = core.table.try_read_attr(instance_conf, "override",
"endpoint")
if endpoint then
- scheme, host, port = endpoint:match(endpoint_regex)
- if port == "" then
- port = (scheme == "https") and "443" or "80"
+ local parsed = url.parse(endpoint)
+ scheme = parsed.scheme
+ host = parsed.host
+ port = parsed.port
+ path = parsed.path
+ query = parsed.query
+ if not port then
+ port = (scheme == "https") and 443 or 80
end
port = tonumber(port)
else
@@ -210,21 +311,21 @@ local function resolve_endpoint(instance_conf)
host = host,
port = port,
scheme = scheme,
+ path = path,
+ query = query,
}
- parse_domain_for_node(new_node)
+ local new_nodes = parse_domain_for_nodes(new_node)
- -- Compare with existing node to see if anything changed
- local old_node = instance_conf._dns_value
- local nodes_changed = not old_node or
- old_node.host ~= new_node.host
+ local nodes_changed = not nodes_equal(instance_conf._dns_nodes, new_nodes)
- -- Only update if something changed
if nodes_changed then
- instance_conf._dns_value = new_node
+ instance_conf._dns_nodes = new_nodes
instance_conf._nodes_ver = (instance_conf._nodes_ver or 0) + 1
core.log.info("DNS resolution changed for instance: ",
instance_conf.name,
- " new node: ", core.json.delay_encode(new_node))
+ " new nodes: ", core.json.delay_encode(new_nodes))
end
+
+ use_node_for_request(instance_conf,
pick_request_node(instance_conf._dns_nodes))
end
@@ -252,16 +353,25 @@ local function fetch_health_instances(conf, checkers)
if checker then
local host = ins.checks and ins.checks.active and
ins.checks.active.host
local port = ins.checks and ins.checks.active and
ins.checks.active.port
+ local healthy_nodes = {}
+ ins._healthy_dns_nodes = nil
+
+ for _, node in ipairs(ins._dns_nodes or {}) do
+ local ok, err = checker:get_target_status(node.host, port or
node.port, host)
+ if ok then
+ healthy_nodes[#healthy_nodes + 1] = node
+ elseif err then
+ core.log.warn("failed to get health check target status,
addr: ",
+ node.host, ":", port or node.port, ", host: ", host,
", err: ", err)
+ end
+ end
- local node = ins._dns_value
- local ok, err = checker:get_target_status(node.host, port or
node.port, host)
- if ok then
+ if #healthy_nodes > 0 then
+ ins._healthy_dns_nodes = healthy_nodes
transform_instances(new_instances, ins)
- elseif err then
- core.log.warn("failed to get health check target status, addr:
",
- node.host, ":", port or node.port, ", host: ", host, ",
err: ", err)
end
else
+ ins._healthy_dns_nodes = nil
transform_instances(new_instances, ins)
end
end
@@ -323,7 +433,7 @@ local function pick_target(ctx, conf, ups_tab)
if instance._nodes_ver then
resource_version = resource_version .. instance._nodes_ver
end
- instances[i]._dns_value = instance._dns_value
+ instances[i]._dns_nodes = instance._dns_nodes
instances[i]._nodes_ver = instance._nodes_ver
local checker = healthcheck_manager.fetch_checker(resource_path,
resource_version)
checkers[instance.name] = checker
@@ -370,6 +480,8 @@ local function pick_target(ctx, conf, ups_tab)
end
local instance_conf = get_instance_conf(conf.instances, instance_name)
+ local nodes = instance_conf._healthy_dns_nodes or instance_conf._dns_nodes
+ use_node_for_request(instance_conf, pick_request_node(nodes))
return instance_name, instance_conf
end
@@ -438,27 +550,30 @@ function _M.construct_upstream(instance)
return nil, "instance configuration is nil"
end
local upstream = {}
- local node = instance._dns_value
- if not node then
+ local nodes = instance._dns_nodes
+ if not nodes then
resolve_endpoint(instance)
- node = instance._dns_value
- if not node then
+ nodes = instance._dns_nodes
+ if not nodes then
return nil, "failed to resolve endpoint for instance: " ..
instance.name
end
end
- if not node.host or not node.port then
- return nil, "invalid upstream node: " .. core.json.encode(node)
+ local upstream_nodes = core.table.new(#nodes, 0)
+ for _, node in ipairs(nodes) do
+ if not node.host or not node.port then
+ return nil, "invalid upstream node: missing host or port"
+ end
+
+ core.table.insert(upstream_nodes, {
+ host = node.host,
+ port = node.port,
+ weight = 1,
+ priority = 0,
+ domain = node.domain,
+ })
end
- local node = {
- host = node.host,
- port = node.port,
- scheme = node.scheme,
- weight = instance.weight or 1,
- priority = instance.priority or 0,
- name = instance.name,
- }
local checks = instance.checks
local auth = instance.auth or {}
if checks and checks.active then
@@ -482,7 +597,7 @@ function _M.construct_upstream(instance)
checks.active.http_path,
core.string.encode_args(auth.query))
end
end
- upstream.nodes = {node}
+ upstream.nodes = upstream_nodes
upstream.checks = checks
upstream._nodes_ver = instance._nodes_ver
return upstream
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 745f3b7a5..8ce5ef9de 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -123,10 +123,13 @@ function _M.before_proxy(conf, ctx, on_error)
local extra_opts = {
name = ai_instance.name,
- endpoint = core.table.try_read_attr(ai_instance, "override",
"endpoint"),
+ endpoint = ai_instance._resolved_endpoint
+ or core.table.try_read_attr(ai_instance, "override",
"endpoint"),
model_options = ai_instance.options,
conf = ai_instance.provider_conf or {},
auth = ai_instance.auth,
+ host_header = ai_instance._resolved_host_header,
+ ssl_server_name = ai_instance._resolved_ssl_server_name,
override_llm_options =
core.table.try_read_attr(ai_instance, "override",
"llm_options"),
request_body_override_map =
diff --git a/apisix/plugins/ai-transport/auth-aws.lua
b/apisix/plugins/ai-transport/auth-aws.lua
index cd2c8600a..ae01c0cd4 100644
--- a/apisix/plugins/ai-transport/auth-aws.lua
+++ b/apisix/plugins/ai-transport/auth-aws.lua
@@ -114,11 +114,13 @@ function _M.sign_request(params, aws_conf, region)
-- encoded twice (e.g., raw ":" → "%3A" on the wire → "%253A" in the
-- canonical URI). normalize_and_encode_path with n=2 decodes each segment
-- then re-encodes it twice, producing the required double-encoded form.
+ local headers = params.headers or {}
+ local signing_host = headers["Host"] or headers["host"] or params.host
local r = {
headers = {},
method = params.method or "POST",
canonicalURI = normalize_and_encode_path(params.path, 2),
- host = params.host,
+ host = signing_host,
port = params.port or 443,
body = params.body,
query = params.query,
@@ -144,7 +146,8 @@ function _M.sign_request(params, aws_conf, region)
params.headers["x-amz-security-token"] =
signed.headers["X-Amz-Security-Token"]
end
if signed.headers["Host"] then
- params.headers["host"] = signed.headers["Host"]
+ params.headers["Host"] = signed.headers["Host"]
+ params.headers["host"] = nil
end
end
diff --git a/t/plugin/ai-proxy-multi-domain-healthcheck-repro.t
b/t/plugin/ai-proxy-multi-domain-healthcheck-repro.t
new file mode 100644
index 000000000..1ab356c9e
--- /dev/null
+++ b/t/plugin/ai-proxy-multi-domain-healthcheck-repro.t
@@ -0,0 +1,244 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("debug");
+repeat_each(1);
+no_long_string();
+no_shuffle();
+no_root_location();
+workers(4);
+
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $user_yaml_config = <<_EOC_;
+plugins:
+ - ai-proxy-multi
+ - prometheus
+_EOC_
+ $block->set_value("extra_yaml_config", $user_yaml_config);
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ server_name openai;
+ listen 16724;
+
+ default_type 'application/json';
+
+ location /v1/chat/completions {
+ content_by_lua_block {
+
ngx.say([[{"choices":[{"message":{"content":"ok","role":"assistant"}}]}]])
+ }
+ }
+
+ location /status/domain {
+ content_by_lua_block {
+ ngx.sleep(1.2)
+ ngx.say("ok")
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: changing DNS order does not rebuild checker while active probe is
in flight
+--- extra_init_by_lua
+ local resolver = require("apisix.core.resolver")
+ local original_parse_domain = resolver.parse_domain
+ local original_parse_domain_all = resolver.parse_domain_all
+
+ resolver.parse_domain = function(host)
+ if host == "multi-ip.example.com" then
+ local current = ngx.shared.test:get(host) or "first"
+ if current == "first" then
+ return "127.0.0.1"
+ end
+ return "127.0.0.2"
+ end
+ return original_parse_domain(host)
+ end
+
+ resolver.parse_domain_all = function(host)
+ if host == "multi-ip.example.com" then
+ local current = ngx.shared.test:get(host) or "first"
+ if current == "first" then
+ return {"127.0.0.1", "127.0.0.2"}
+ end
+ return {"127.0.0.2", "127.0.0.1"}
+ end
+ return original_parse_domain_all(host)
+ end
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local json = require("cjson.safe")
+ local http = require("resty.http")
+ ngx.shared.test:set("multi-ip.example.com", "first")
+
+ local route = {
+ uri = "/ai",
+ plugins = {
+ ["ai-proxy-multi"] = {
+ fallback_strategy =
"instance_health_and_rate_limiting",
+ instances = {
+ {
+ name = "openai-domain",
+ provider = "openai",
+ weight = 1,
+ priority = 1,
+ auth = {
+ header = {
+ Authorization = "Bearer token",
+ },
+ },
+ options = {
+ model = "gpt-4",
+ },
+ override = {
+ endpoint =
"http://multi-ip.example.com:16724",
+ },
+ checks = {
+ active = {
+ timeout = 3,
+ http_path = "/status/domain",
+ host = "multi-ip.example.com",
+ healthy = {
+ interval = 1,
+ successes = 1,
+ },
+ unhealthy = {
+ interval = 1,
+ http_failures = 1,
+ tcp_failures = 1,
+ timeouts = 1,
+ },
+ },
+ },
+ },
+ {
+ name = "openai-fallback",
+ provider = "openai",
+ weight = 1,
+ priority = 10,
+ auth = {
+ header = {
+ Authorization = "Bearer token",
+ },
+ },
+ options = {
+ model = "gpt-4",
+ },
+ override = {
+ endpoint = "http://127.0.0.1:16724",
+ },
+ },
+ },
+ ssl_verify = false,
+ },
+ },
+ }
+
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ json.encode(route)
+ )
+ assert(code < 300, body)
+
+ local function send_ai_http()
+ local httpc = http.new()
+ local res, err = httpc:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/ai", {
+ method = "POST",
+ body = json.encode({messages = {{role = "user", content =
"hi"}}}),
+ headers = {
+ ["Content-Type"] = "application/json",
+ },
+ keepalive = false,
+ })
+ if not res then
+ ngx.log(ngx.WARN, "failed to send ai request: ", err)
+ return
+ end
+ if res.status ~= 200 then
+ ngx.log(ngx.WARN, "unexpected ai response: ", res.status,
" ", res.body)
+ end
+ end
+
+ for _ = 1, 16 do
+ send_ai_http()
+ end
+ ngx.sleep(1.5)
+
+ local stop_at = ngx.now() + 10
+ local traffic = ngx.thread.spawn(function()
+ while ngx.now() < stop_at do
+ local threads = {}
+ for _ = 1, 8 do
+ threads[#threads + 1] = ngx.thread.spawn(send_ai_http)
+ end
+ for _, th in ipairs(threads) do
+ ngx.thread.wait(th)
+ end
+ ngx.sleep(0.02)
+ end
+ end)
+
+ local order = "second"
+ while ngx.now() < stop_at do
+ ngx.shared.test:set("multi-ip.example.com", order)
+ order = order == "second" and "first" or "second"
+ ngx.sleep(0.7)
+ end
+ ngx.thread.wait(traffic)
+
+ local deadline = ngx.now() + 1.5
+ while ngx.now() < deadline do
+ local threads = {}
+ for _ = 1, 8 do
+ threads[#threads + 1] = ngx.thread.spawn(send_ai_http)
+ end
+ for _, th in ipairs(threads) do
+ ngx.thread.wait(th)
+ end
+ ngx.sleep(0.05)
+ end
+
+ ngx.sleep(1.2)
+ ngx.say("passed")
+ }
+ }
+--- timeout: 25
+--- response_body
+passed
+--- no_error_log
+releasing existing checker:
+trying to increment a target that is not in the list
+failed to get health check target status
+target not found
diff --git a/t/plugin/ai-proxy-multi-domain-healthcheck.t
b/t/plugin/ai-proxy-multi-domain-healthcheck.t
new file mode 100644
index 000000000..286216d5b
--- /dev/null
+++ b/t/plugin/ai-proxy-multi-domain-healthcheck.t
@@ -0,0 +1,338 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("debug");
+repeat_each(1);
+no_long_string();
+no_shuffle();
+no_root_location();
+
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $user_yaml_config = <<_EOC_;
+plugins:
+ - ai-proxy-multi
+ - prometheus
+_EOC_
+ $block->set_value("extra_yaml_config", $user_yaml_config);
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ server_name openai;
+ listen 127.0.0.1:16724;
+ listen 127.0.0.2:16724;
+
+ default_type 'application/json';
+
+ location /v1/chat/completions {
+ content_by_lua_block {
+ ngx.say(string.format(
+
[[{"host":"%s","server_addr":"%s","choices":[{"message":{"content":"ok","role":"assistant"}}]}]],
+ ngx.req.get_headers()["host"],
+ ngx.var.server_addr
+ ))
+ }
+ }
+
+ location /status/domain {
+ content_by_lua_block {
+ ngx.say("ok")
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: domain healthcheck uses stable resolved IP set and request uses
selected IP
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local resolver = require("apisix.core.resolver")
+ local json = require("cjson.safe")
+ local original_parse_domain_all = resolver.parse_domain_all
+ local dns_query_count = 0
+
+ resolver.parse_domain_all = function(host)
+ if host == "multi-ip.example.com" then
+ dns_query_count = dns_query_count + 1
+ if dns_query_count % 2 == 1 then
+ return {"127.0.0.2", "127.0.0.1"}
+ end
+ return {"127.0.0.1", "127.0.0.2"}
+ end
+ return original_parse_domain_all(host)
+ end
+
+ local route = {
+ uri = "/ai",
+ plugins = {
+ ["ai-proxy-multi"] = {
+ fallback_strategy =
"instance_health_and_rate_limiting",
+ instances = {
+ {
+ name = "openai-domain",
+ provider = "openai",
+ weight = 1,
+ priority = 1,
+ auth = {
+ header = {
+ Authorization = "Bearer token",
+ },
+ },
+ options = {
+ model = "gpt-4",
+ },
+ override = {
+ endpoint =
"http://multi-ip.example.com:16724",
+ },
+ checks = {
+ active = {
+ timeout = 1,
+ http_path = "/status/domain",
+ host = "multi-ip.example.com",
+ healthy = {
+ interval = 1,
+ successes = 1,
+ },
+ unhealthy = {
+ interval = 1,
+ http_failures = 1,
+ tcp_failures = 1,
+ timeouts = 1,
+ },
+ },
+ },
+ },
+ {
+ name = "openai-fallback",
+ provider = "openai",
+ weight = 1,
+ priority = 0,
+ auth = {
+ header = {
+ Authorization = "Bearer token",
+ },
+ },
+ options = {
+ model = "gpt-4",
+ },
+ override = {
+ endpoint = "http://127.0.0.1:16724",
+ },
+ },
+ },
+ ssl_verify = false,
+ },
+ },
+ }
+
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ json.encode(route)
+ )
+ assert(code < 300, body)
+
+ local function send_ai()
+ local code, _, body = t("/ai",
+ ngx.HTTP_POST,
+ json.encode({messages = {{role = "user", content =
"hi"}}}),
+ nil,
+ {
+ ["Content-Type"] = "application/json",
+ }
+ )
+ assert(code == 200, "unexpected status: " .. tostring(code) ..
", body: " .. body)
+ return json.decode(body)
+ end
+
+ local ok, err = xpcall(function()
+ local res = send_ai()
+ assert(res.host == "multi-ip.example.com:16724",
+ "unexpected host: " .. tostring(res.host))
+ assert(res.server_addr == "127.0.0.1" or res.server_addr ==
"127.0.0.2",
+ "unexpected server addr: " .. tostring(res.server_addr))
+
+ ngx.sleep(1.2)
+ res = send_ai()
+ assert(res.host == "multi-ip.example.com:16724",
+ "unexpected host: " .. tostring(res.host))
+ assert(res.server_addr == "127.0.0.1" or res.server_addr ==
"127.0.0.2",
+ "unexpected server addr: " .. tostring(res.server_addr))
+ end, debug.traceback)
+ resolver.parse_domain_all = original_parse_domain_all
+ assert(ok, err)
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+--- no_error_log
+releasing existing checker:
+trying to increment a target that is not in the list
+
+
+
+=== TEST 2: parse_domain_all asks DNS client for all answers
+--- config
+ location /t {
+ content_by_lua_block {
+ local resolver = require("apisix.core.resolver")
+ local utils = require("apisix.core.utils")
+ local dns_client = require("apisix.core.dns.client")
+ local original_dns_parse = utils.dns_parse
+ local got_selector
+
+ utils.dns_parse = function(host, selector)
+ got_selector = selector
+ assert(host == "multi-ip.example.com", "unexpected host: " ..
tostring(host))
+ return {
+ {address = "127.0.0.2"},
+ {address = "127.0.0.1"},
+ }
+ end
+
+ local ok, ips, err = xpcall(function()
+ local ips, err =
resolver.parse_domain_all("multi-ip.example.com")
+ return ips, err
+ end, debug.traceback)
+ utils.dns_parse = original_dns_parse
+
+ assert(ok, ips)
+ assert(not err, err)
+ assert(got_selector == dns_client.RETURN_ALL,
+ "unexpected selector: " .. tostring(got_selector))
+ assert(ips[1] == "127.0.0.1", "unexpected first ip: " ..
tostring(ips[1]))
+ assert(ips[2] == "127.0.0.2", "unexpected second ip: " ..
tostring(ips[2]))
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 3: parse_domain_all uses hosts before DNS client
+--- config
+ location /t {
+ content_by_lua_block {
+ local resolver = require("apisix.core.resolver")
+ local utils = require("apisix.core.utils")
+ local original_dns_parse = utils.dns_parse
+
+ utils.dns_parse = function()
+ error("dns_parse should not be called")
+ end
+
+ local ok, ips, err = xpcall(function()
+ local ips, err = resolver.parse_domain_all("localhost")
+ return ips, err
+ end, debug.traceback)
+ utils.dns_parse = original_dns_parse
+
+ assert(ok, ips)
+ assert(not err, err)
+ assert(#ips > 0, "expected hosts result")
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 4: AWS SigV4 signs resolved domain request with Host authority
+--- config
+ location /t {
+ content_by_lua_block {
+ local old_signer = package.loaded["resty.aws.request.sign"]
+ local old_aws = package.loaded["resty.aws"]
+ local old_auth_aws =
package.loaded["apisix.plugins.ai-transport.auth-aws"]
+ local signed_host
+
+ package.loaded["resty.aws"] = function()
+ return {
+ Credentials = function(_, conf)
+ return conf
+ end,
+ }
+ end
+ package.loaded["resty.aws.request.sign"] = function(_, request)
+ signed_host = request.host
+ return {
+ headers = {
+ Authorization = "AWS4-HMAC-SHA256 test",
+ ["X-Amz-Date"] = "20260526T000000Z",
+ Host = request.host,
+ },
+ }
+ end
+ package.loaded["apisix.plugins.ai-transport.auth-aws"] = nil
+
+ local ok, err = xpcall(function()
+ local auth_aws =
require("apisix.plugins.ai-transport.auth-aws")
+ local params = {
+ method = "POST",
+ host = "127.0.0.1",
+ port = 16724,
+ path = "/model/test/converse",
+ headers = {
+ Host = "bedrock-domain.example.com:16724",
+ },
+ body = {
+ messages = {
+ {role = "user", content = {{text = "hi"}}},
+ },
+ },
+ }
+ local sign_err = auth_aws.sign_request(params, {
+ access_key_id = "AKIAIOSFODNN7EXAMPLE",
+ secret_access_key =
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ }, "us-east-1")
+
+ assert(not sign_err, sign_err)
+ assert(signed_host == "bedrock-domain.example.com:16724",
+ "unexpected signed host: " .. tostring(signed_host))
+ assert(params.headers["Host"] ==
"bedrock-domain.example.com:16724",
+ "unexpected Host header: " ..
tostring(params.headers["Host"]))
+ assert(params.headers["host"] == nil,
+ "unexpected lowercase host header: " ..
tostring(params.headers["host"]))
+ end, debug.traceback)
+
+ package.loaded["resty.aws.request.sign"] = old_signer
+ package.loaded["resty.aws"] = old_aws
+ package.loaded["apisix.plugins.ai-transport.auth-aws"] =
old_auth_aws
+ assert(ok, err)
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
diff --git a/t/plugin/ai-proxy-multi.balancer.t
b/t/plugin/ai-proxy-multi.balancer.t
index a4107a8de..ee22277e3 100644
--- a/t/plugin/ai-proxy-multi.balancer.t
+++ b/t/plugin/ai-proxy-multi.balancer.t
@@ -222,7 +222,7 @@ __DATA__
"temperature": 1.0
},
"override": {
- "endpoint": "http://localhost:6724"
+ "endpoint": "http://127.0.0.1:6724"
}
},
{
@@ -240,7 +240,7 @@ __DATA__
"temperature": 1.0
},
"override": {
- "endpoint":
"http://localhost:6724/chat/completions"
+ "endpoint":
"http://127.0.0.1:6724/chat/completions"
}
}
],
@@ -326,7 +326,7 @@
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
"temperature": 1.0
},
"override": {
- "endpoint": "http://localhost:6724"
+ "endpoint": "http://127.0.0.1:6724"
}
},
{
@@ -335,7 +335,7 @@
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
"weight": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
"options": {"model":
"deepseek-chat","max_tokens": 512,"temperature": 1.0},
- "override": {"endpoint":
"http://localhost:6724/chat/completions"}
+ "override": {"endpoint":
"http://127.0.0.1:6724/chat/completions"}
}
],
"ssl_verify": false
@@ -440,7 +440,7 @@ distribution: openai: 8
"provider": "deepseek",
"weight": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
- "options": {"model":
"deepseek-chat","max_tokens": 512,"temperature": 1.0},"override": {"endpoint":
"http://localhost:6724/chat/completions"}}
+ "options": {"model":
"deepseek-chat","max_tokens": 512,"temperature": 1.0},"override": {"endpoint":
"http://127.0.0.1:6724/chat/completions"}}
],
"ssl_verify": false
}
@@ -532,7 +532,7 @@ distribution: deepseek: 10
"endpoint": "http://localhost:6726"
}
},
-
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://localhost:6724/chat/completions"}}
+
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://127.0.0.1:6724/chat/completions"}}
],
"ssl_verify": false
}
@@ -624,7 +624,7 @@ distribution: deepseek: 10
"endpoint": "http://localhost:6725"
}
},
-
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://localhost:6724/chat/completions"}}
+
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://127.0.0.1:6724/chat/completions"}}
],
"ssl_verify": false
}
@@ -716,7 +716,7 @@ distribution: deepseek: 10
"endpoint": "http://localhost:6728"
}
},
-
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://localhost:6724/chat/completions"}}
+
{"name":"deepseek","provider":"deepseek","weight":1,"auth":{"header":{"Authorization":"Bearer
token"}},"options":{"model":"deepseek-chat","max_tokens":512,"temperature":1.0},"override":{"endpoint":"http://127.0.0.1:6724/chat/completions"}}
],
"ssl_verify": false
}
@@ -792,7 +792,7 @@ distribution: deepseek: 10
"http_5xx"
],
"instances": [
- {"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-429","override":{"endpoint":"http://localhost:6726"},"priority":10,"provider":"openai-compatible","weight":10},{"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-500","override":{"endpoint":"http://localhost:6727"},"priority":0,"provider":"openai-compatible","weight":10},{"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-200","override":{"endpoint":"http://localhost [...]
+ {"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-429","override":{"endpoint":"http://localhost:6726"},"priority":10,"provider":"openai-compatible","weight":10},{"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-500","override":{"endpoint":"http://localhost:6727"},"priority":0,"provider":"openai-compatible","weight":10},{"auth":{"header":{"Authorization":"Bearer
token"}},"name":"mock-200","override":{"endpoint":"http://127.0.0.1 [...]
],
"ssl_verify": false
}
@@ -974,10 +974,10 @@ distribution: 502: 10
max_tokens = 512,
temperature = 1
},
- override = { endpoint = "http://localhost:6724" },
+ override = { endpoint = "http://127.0.0.1:6724" },
checks = {
active = {
- type = "http", host = "localhost", port =
6729, http_path = "/status_200",
+ type = "http", host = "127.0.0.1", port =
6729, http_path = "/status_200",
healthy = { interval = 1, successes = 1 },
unhealthy = { interval = 1, http_failures
= 1 }
}
@@ -995,10 +995,10 @@ distribution: 502: 10
max_tokens = 512,
temperature = 1
},
- override = { endpoint =
"http://localhost:6724/chat/completions" },
+ override = { endpoint =
"http://127.0.0.1:6724/chat/completions" },
checks = {
active = {
- type = "http", host = "localhost", port =
6729, http_path = "/status_500",
+ type = "http", host = "127.0.0.1", port =
6729, http_path = "/status_500",
healthy = { interval = 1, successes = 1 },
unhealthy = { interval = 1, http_failures
= 1 }
}
diff --git a/t/plugin/ai-proxy-multi3.t b/t/plugin/ai-proxy-multi3.t
index 21930b0d1..9a7e1376d 100644
--- a/t/plugin/ai-proxy-multi3.t
+++ b/t/plugin/ai-proxy-multi3.t
@@ -204,7 +204,7 @@ __DATA__
"priority": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
"options": {"model": "gpt-4"},
- "override": {"endpoint":
"http://localhost:16724"},
+ "override": {"endpoint":
"http://127.0.0.1:16724"},
"checks": {
"active": {
"timeout": 5,
@@ -223,7 +223,7 @@ __DATA__
"priority": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
"options": {"model": "gpt-3"},
- "override": {"endpoint":
"http://localhost:16724"}
+ "override": {"endpoint":
"http://127.0.0.1:16724"}
}
],
"ssl_verify": false
@@ -346,7 +346,7 @@ passed
"priority": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
"options": {"model": "gpt-4"},
- "override": {"endpoint":
"http://localhost:16724"},
+ "override": {"endpoint":
"http://127.0.0.1:16724"},
"checks": {
"active": {
"timeout": 5,
@@ -365,7 +365,7 @@ passed
"priority": 1,
"auth": {"header": {"Authorization":
"Bearer token"}},
"options": {"model": "gpt-3"},
- "override": {"endpoint":
"http://localhost:16724"}
+ "override": {"endpoint":
"http://127.0.0.1:16724"}
}
],
"ssl_verify": false
@@ -529,7 +529,7 @@ passed
"model": "gpt-4"
},
"override": {
- "endpoint": "http://localhost:16724"
+ "endpoint": "http://127.0.0.1:16724"
},
]] .. string.format(checks_tmp, "gpt4")..
[[
},
@@ -547,7 +547,7 @@ passed
"model": "gpt-3"
},
"override": {
- "endpoint": "http://localhost:16724"
+ "endpoint": "http://127.0.0.1:16724"
},
]] .. string.format(checks_tmp, "gpt3") ..
[[
}
@@ -693,7 +693,7 @@ passed
"model": "gpt-4"
},
"override": {
- "endpoint": "http://localhost:16724"
+ "endpoint": "http://127.0.0.1:16724"
},
]] .. string.format(checks_tmp, "gpt4")..
[[
},
@@ -711,7 +711,7 @@ passed
"model": "gpt-3"
},
"override": {
- "endpoint": "http://localhost:16724"
+ "endpoint": "http://127.0.0.1:16724"
},
]] .. string.format(checks_tmp, "gpt3") ..
[[
}
@@ -916,8 +916,10 @@ POST /ai
content_by_lua_block {
local t = require("lib.test_admin").test
local resolver = require("apisix.core.resolver")
- -- Mock resolver.parse_domain to return different IPs on different
calls
+ -- Mock resolver.parse_domain and resolver.parse_domain_all so
this test
+ -- covers both the old single-answer path and the current
all-answer path.
local original_parse_domain = resolver.parse_domain
+ local original_parse_domain_all = resolver.parse_domain_all
local call_count = 0
resolver.parse_domain = function(host)
if host == "test.example.com" then
@@ -930,6 +932,17 @@ POST /ai
end
return original_parse_domain(host)
end
+ resolver.parse_domain_all = function(host)
+ if host == "test.example.com" then
+ call_count = call_count + 1
+ if call_count == 1 then
+ return {"127.0.0.1", "127.0.0.2"}
+ else
+ return {"127.0.0.2", "127.0.0.1"}
+ end
+ end
+ return original_parse_domain_all(host)
+ end
-- Create a route with health check that uses the domain
local core = require("apisix.core")
local route_config = {
@@ -1048,6 +1061,7 @@ POST /ai
-- Restore original function
resolver.parse_domain = original_parse_domain
+ resolver.parse_domain_all = original_parse_domain_all
ngx.sleep(3)
ngx.say("passed")
}
@@ -1057,8 +1071,8 @@ passed
passed
--- no_error_log
failed to get health check target status
---- error_log
releasing existing checker
+trying to increment a target that is not in the list
--- timeout: 5