This is an automated email from the ASF dual-hosted git repository.
young pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 5b7170def feat: support vertex-ai (#12933)
5b7170def is described below
commit 5b7170def6deccffb593bdab13fb705f4202f75d
Author: YYYoung <[email protected]>
AuthorDate: Mon Jan 26 16:01:07 2026 +0800
feat: support vertex-ai (#12933)
---
apisix/cli/ngx_tpl.lua | 1 +
apisix/plugins/ai-drivers/azure-openai.lua | 4 +-
apisix/plugins/ai-drivers/openai-base.lua | 142 +++++--
apisix/plugins/ai-drivers/schema.lua | 1 +
apisix/plugins/ai-drivers/vertex-ai.lua | 191 +++++++++
apisix/plugins/ai-proxy-multi.lua | 18 +-
apisix/plugins/ai-proxy.lua | 7 +
apisix/plugins/ai-proxy/base.lua | 5 +-
apisix/plugins/ai-proxy/schema.lua | 53 ++-
apisix/plugins/ai-request-rewrite.lua | 3 +-
apisix/utils/google-cloud-oauth.lua | 3 +
docs/en/latest/plugins/ai-proxy-multi.md | 6 +-
docs/en/latest/plugins/ai-proxy.md | 6 +-
docs/en/latest/plugins/ai-request-rewrite.md | 2 +-
docs/zh/latest/plugins/ai-proxy-multi.md | 6 +-
docs/zh/latest/plugins/ai-proxy.md | 6 +-
docs/zh/latest/plugins/ai-request-rewrite.md | 2 +-
t/plugin/ai-proxy-vertex-ai.t | 572 +++++++++++++++++++++++++++
18 files changed, 973 insertions(+), 55 deletions(-)
diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
index bfca1ccba..e82b70b04 100644
--- a/apisix/cli/ngx_tpl.lua
+++ b/apisix/cli/ngx_tpl.lua
@@ -53,6 +53,7 @@ env PATH; # for searching external plugin runner's binary
# reserved environment variables for configuration
env APISIX_DEPLOYMENT_ETCD_HOST;
+env GCP_SERVICE_ACCOUNT;
{% if envs then %}
{% for _, name in ipairs(envs) do %}
diff --git a/apisix/plugins/ai-drivers/azure-openai.lua
b/apisix/plugins/ai-drivers/azure-openai.lua
index 3022e7671..3df83fa1e 100644
--- a/apisix/plugins/ai-drivers/azure-openai.lua
+++ b/apisix/plugins/ai-drivers/azure-openai.lua
@@ -19,8 +19,6 @@ return require("apisix.plugins.ai-drivers.openai-base").new(
{
path = "/completions",
port = 443,
- options = {
- remove_model = true
- }
+ remove_model = true
}
)
diff --git a/apisix/plugins/ai-drivers/openai-base.lua
b/apisix/plugins/ai-drivers/openai-base.lua
index cb48c3e07..8688f9eb0 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -27,6 +27,9 @@ local plugin = require("apisix.plugin")
local http = require("resty.http")
local url = require("socket.url")
local sse = require("apisix.plugins.ai-drivers.sse")
+local google_oauth = require("apisix.utils.google-cloud-oauth")
+
+local lrucache = require("resty.lrucache")
local ngx = ngx
local ngx_now = ngx.now
@@ -34,6 +37,7 @@ local table = table
local pairs = pairs
local type = type
local math = math
+local os = os
local ipairs = ipairs
local setmetatable = setmetatable
@@ -41,15 +45,8 @@ local HTTP_INTERNAL_SERVER_ERROR =
ngx.HTTP_INTERNAL_SERVER_ERROR
local HTTP_GATEWAY_TIMEOUT = ngx.HTTP_GATEWAY_TIMEOUT
-function _M.new(opts)
-
- local self = {
- host = opts.host,
- port = opts.port,
- path = opts.path,
- remove_model = opts.options and opts.options.remove_model
- }
- return setmetatable(self, mt)
+function _M.new(opt)
+ return setmetatable(opt, mt)
end
@@ -76,7 +73,7 @@ local function handle_error(err)
end
-local function read_response(ctx, res)
+local function read_response(conf, ctx, res, response_filter)
local body_reader = res.body_reader
if not body_reader then
core.log.warn("AI service sent no response body")
@@ -153,6 +150,7 @@ local function read_response(ctx, res)
end
end
+ local headers = res.headers
local raw_res_body, err = res:read_body()
if not raw_res_body then
core.log.warn("failed to read response body: ", err)
@@ -166,6 +164,25 @@ local function read_response(ctx, res)
core.log.warn("invalid response body from ai service: ", raw_res_body,
" err: ", err,
", it will cause token usage not available")
else
+ if response_filter then
+ local resp = {
+ headers = headers,
+ body = res_body,
+ }
+ local code, err = response_filter(conf, ctx, resp)
+ if code then
+ return code, err
+ end
+ if resp.body then
+ local body, err = core.json.encode(resp.body)
+ if not body then
+ core.log.error("failed to encode response body after
response filter: ", err)
+ return 500
+ end
+ raw_res_body = body
+ end
+ headers = resp.headers
+ end
core.log.info("got token usage from ai service: ",
core.json.delay_encode(res_body.usage))
ctx.ai_token_usage = {}
if type(res_body.usage) == "table" then
@@ -189,7 +206,44 @@ local function read_response(ctx, res)
ctx.var.llm_response_text = content_to_check
end
end
- plugin.lua_response_filter(ctx, res.headers, raw_res_body)
+ plugin.lua_response_filter(ctx, headers, raw_res_body)
+end
+
+
+local gcp_access_token_cache = lrucache.new(1024 * 4)
+
+local function fetch_gcp_access_token(ctx, name, gcp_conf)
+ local key = core.lrucache.plugin_ctx_id(ctx, name)
+ local access_token = gcp_access_token_cache:get(key)
+ if access_token then
+ return access_token
+ end
+ -- generate access token
+ local auth_conf = {}
+ local service_account_json = gcp_conf.service_account_json or
+ os.getenv("GCP_SERVICE_ACCOUNT")
+ if type(service_account_json) == "string" and service_account_json ~= ""
then
+ local conf, err = core.json.decode(service_account_json)
+ if not conf then
+ return nil, "invalid gcp service account json: " .. (err or
"unknown error")
+ end
+ auth_conf = conf
+ end
+ local oauth = google_oauth.new(auth_conf)
+ access_token = oauth:generate_access_token()
+ if not access_token then
+ return nil, "failed to get google oauth token"
+ end
+ local ttl = oauth.access_token_ttl or 6
+ if gcp_conf.expire_early_secs and ttl > gcp_conf.expire_early_secs then
+ ttl = ttl - gcp_conf.expire_early_secs
+ end
+ if gcp_conf.max_ttl and ttl > gcp_conf.max_ttl then
+ ttl = gcp_conf.max_ttl
+ end
+ gcp_access_token_cache:set(key, access_token, ttl)
+ core.log.debug("set gcp access token in cache with ttl: ", ttl, ", key: ",
key)
+ return access_token
end
@@ -201,7 +255,21 @@ function _M.request(self, ctx, conf, request_table,
extra_opts)
end
httpc:set_timeout(conf.timeout)
- local endpoint = extra_opts and extra_opts.endpoint
+ core.log.info("request extra_opts to LLM server: ",
core.json.delay_encode(extra_opts, true))
+
+ local auth = extra_opts.auth or {}
+ local token
+ if auth.gcp then
+ local access_token, err = fetch_gcp_access_token(ctx, extra_opts.name,
+ auth.gcp)
+ if not access_token then
+ core.log.error("failed to get gcp access token: ", err)
+ return 500
+ end
+ token = access_token
+ end
+
+ local endpoint = extra_opts.endpoint
local parsed_url
if endpoint then
parsed_url = url.parse(endpoint)
@@ -217,20 +285,8 @@ function _M.request(self, ctx, conf, request_table,
extra_opts)
port = 80
end
end
- local ok, err = httpc:connect({
- scheme = scheme,
- host = host,
- port = port,
- ssl_verify = conf.ssl_verify,
- ssl_server_name = parsed_url and parsed_url.host or self.host,
- })
-
- if not ok then
- core.log.warn("failed to connect to LLM server: ", err)
- return handle_error(err)
- end
- local query_params = extra_opts.query_params
+ local query_params = auth.query or {}
if type(parsed_url) == "table" and parsed_url.query and #parsed_url.query
> 0 then
local args_tab = core.string.decode_args(parsed_url.query)
@@ -241,14 +297,22 @@ function _M.request(self, ctx, conf, request_table,
extra_opts)
local path = (parsed_url and parsed_url.path or self.path)
- local headers = extra_opts.headers
+ local headers = auth.header or {}
headers["Content-Type"] = "application/json"
+ if token then
+ headers["Authorization"] = "Bearer " .. token
+ end
+
local params = {
method = "POST",
+ scheme = scheme,
headers = headers,
ssl_verify = conf.ssl_verify,
path = path,
- query = query_params
+ query = query_params,
+ host = host,
+ port = port,
+ ssl_server_name = parsed_url and parsed_url.host or self.host,
}
if extra_opts.model_options then
@@ -256,12 +320,30 @@ function _M.request(self, ctx, conf, request_table,
extra_opts)
request_table[opt] = val
end
end
+ params.body = request_table
+
if self.remove_model then
request_table.model = nil
end
- local req_json, err = core.json.encode(request_table)
+
+ if self.request_filter then
+ local code, err = self.request_filter(extra_opts.conf, ctx, params)
+ if code then
+ return code, err
+ end
+ end
+
+ core.log.info("sending request to LLM server: ",
core.json.delay_encode(params, true))
+
+ local ok, err = httpc:connect(params)
+ if not ok then
+ core.log.error("failed to connect to LLM server: ", err)
+ return handle_error(err)
+ end
+
+ local req_json, err = core.json.encode(params.body)
if not req_json then
- return nil, err
+ return 500, "failed to encode request body: " .. (err or "unknown
error")
end
params.body = req_json
@@ -277,7 +359,7 @@ function _M.request(self, ctx, conf, request_table,
extra_opts)
return res.status
end
- local code, body = read_response(ctx, res)
+ local code, body = read_response(extra_opts.conf, ctx, res,
self.response_filter)
if conf.keepalive then
local ok, err = httpc:set_keepalive(conf.keepalive_timeout,
conf.keepalive_pool)
diff --git a/apisix/plugins/ai-drivers/schema.lua
b/apisix/plugins/ai-drivers/schema.lua
index 8048602ea..4f08f88ae 100644
--- a/apisix/plugins/ai-drivers/schema.lua
+++ b/apisix/plugins/ai-drivers/schema.lua
@@ -49,6 +49,7 @@ local openai_compatible_list = {
"openai-compatible",
"azure-openai",
"openrouter",
+ "vertex-ai",
"gemini",
}
diff --git a/apisix/plugins/ai-drivers/vertex-ai.lua
b/apisix/plugins/ai-drivers/vertex-ai.lua
new file mode 100644
index 000000000..9b1e35113
--- /dev/null
+++ b/apisix/plugins/ai-drivers/vertex-ai.lua
@@ -0,0 +1,191 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core = require("apisix.core")
+local string = string
+local str_fmt = string.format
+local type = type
+local ipairs = ipairs
+
+local host_template_fmt =
+ "%s-aiplatform.googleapis.com"
+local embeddings_path_template_fmt =
+ "/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
+local chat_completions_path_template_fmt =
+ "/v1beta1/projects/%s/locations/%s/endpoints/openapi/chat/completions"
+
+local function get_host(region)
+ return str_fmt(host_template_fmt, region)
+end
+
+
+local function get_chat_completions_path(project_id, region)
+ return str_fmt(chat_completions_path_template_fmt, project_id, region)
+end
+
+
+local function get_embeddings_path(project_id, region, model)
+ return str_fmt(embeddings_path_template_fmt, project_id, region, model)
+end
+
+
+local function get_node(instance_conf)
+ local host = "aiplatform.googleapis.com"
+ local region = core.table.try_read_attr(instance_conf, "provider_conf",
"region")
+ if region then
+ host = get_host(region)
+ end
+ return {
+ scheme = "https",
+ host = host,
+ port = 443,
+ }
+end
+
+local function openai_embeddings_to_vertex_predict(openai_req)
+ if not openai_req then
+ return nil, "empty openai request"
+ end
+
+ local input = openai_req.input
+ if not input then
+ return nil, "`input` is required for embeddings"
+ end
+
+ local input_contexts = {}
+
+ if type(input) == "string" then
+ input_contexts = { input }
+ elseif type(input) == "table" then
+ for i, v in ipairs(input) do
+ if type(v) == "string" then
+ core.table.insert(input_contexts, v)
+ elseif type(v) == "table" then
+ core.table.insert(input_contexts, core.table.concat(v, " "))
+ else
+ return nil, "unsupported input type at index " .. i
+ end
+ end
+ else
+ return nil, "`input` must be string or array"
+ end
+
+ local instances = {}
+ for _, text in ipairs(input_contexts) do
+ core.table.insert(instances, {
+ content = text
+ })
+ end
+
+ return {
+ instances = instances
+ }
+end
+
+local function vertex_predict_to_openai_embeddings(vertex_resp, openai_model)
+ if type(vertex_resp) ~= "table" then
+ return nil, "empty vertex response"
+ end
+
+ local predictions = vertex_resp.predictions
+ if type(predictions) ~= "table" then
+ return nil, "vertex response missing predictions"
+ end
+
+ local data = {}
+ local total_tokens = 0
+
+ for i, pred in ipairs(predictions) do
+ local emb = pred.embeddings or {}
+ local values = emb.values
+ if type(values) ~= "table" then
+ return nil, "invalid embedding at index " .. i
+ end
+
+ if emb.statistics and emb.statistics.token_count then
+ total_tokens = total_tokens + emb.statistics.token_count
+ end
+
+ core.table.insert(data, {
+ object = "embedding",
+ index = i - 1,
+ embedding = values
+ })
+ end
+
+ return {
+ object = "list",
+ data = data,
+ model = openai_model or "unknown",
+ usage = {
+ prompt_tokens = total_tokens,
+ total_tokens = total_tokens,
+ }
+ }
+end
+
+
+local function request_filter(conf, ctx, http_params)
+ local body = http_params.body
+ if body and body.input then
+ ctx.llm_request_type = "embeddings"
+ local vertex_req, err = openai_embeddings_to_vertex_predict(body)
+ if not vertex_req then
+ return nil, "failed to convert to vertex predict request: " .. err
+ end
+ http_params.body = vertex_req
+ core.log.debug("using embeddings endpoint for Vertex AI")
+ else
+ ctx.llm_request_type = "chat_completions"
+ end
+ ctx.llm_request_model = body and body.model
+
+ if conf.project_id and conf.region then
+ if not http_params.path then
+ local path
+ if ctx.llm_request_type == "embeddings" then
+ path = get_embeddings_path(conf.project_id, conf.region,
body.model)
+ else
+ path = get_chat_completions_path(conf.project_id, conf.region)
+ end
+ http_params.path = path
+ end
+ if not http_params.host then
+ http_params.host = get_host(conf.region)
+ end
+ end
+end
+
+
+local function response_filter(conf, ctx, resp)
+ if ctx.llm_request_type == "embeddings" then
+ local vertex_body = resp.body
+ local openai_resp, err =
vertex_predict_to_openai_embeddings(vertex_body,
+
ctx.llm_request_model)
+ if not openai_resp then
+ return 500, "failed to convert to openai embeddings response: " ..
err
+ end
+ resp.body = openai_resp
+ end
+end
+
+
+return require("apisix.plugins.ai-drivers.openai-base").new({
+ get_node = get_node,
+ request_filter = request_filter,
+ response_filter = response_filter,
+})
diff --git a/apisix/plugins/ai-proxy-multi.lua
b/apisix/plugins/ai-proxy-multi.lua
index bb32f5ffb..74a84d0e3 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -107,6 +107,13 @@ function _M.check_schema(conf)
core.log.warn("fail to require ai provider: ", instance.provider,
", err", err)
return false, "ai provider: " .. instance.provider .. " is not
supported."
end
+ local sa_json = core.table.try_read_attr(instance, "auth", "gcp",
"service_account_json")
+ if sa_json then
+ local _, err = core.json.decode(sa_json)
+ if err then
+ return false, "invalid gcp service_account_json: " .. err
+ end
+ end
end
local algo = core.table.try_read_attr(conf, "balancer", "algorithm")
local hash_on = core.table.try_read_attr(conf, "balancer", "hash_on")
@@ -181,10 +188,15 @@ local function resolve_endpoint(instance_conf)
port = tonumber(port)
else
local ai_driver = require("apisix.plugins.ai-drivers." ..
instance_conf.provider)
- -- built-in ai driver always use https
+ if ai_driver.get_node then
+ local node = ai_driver.get_node(instance_conf)
+ host = node.host
+ port = node.port
+ else
+ host = ai_driver.host
+ port = ai_driver.port
+ end
scheme = "https"
- host = ai_driver.host
- port = ai_driver.port
end
local new_node = {
host = host,
diff --git a/apisix/plugins/ai-proxy.lua b/apisix/plugins/ai-proxy.lua
index 092eb6a08..68bd36d8b 100644
--- a/apisix/plugins/ai-proxy.lua
+++ b/apisix/plugins/ai-proxy.lua
@@ -40,6 +40,13 @@ function _M.check_schema(conf)
core.log.warn("fail to require ai provider: ", conf.provider, ", err",
err)
return false, "ai provider: " .. conf.provider .. " is not supported."
end
+ local sa_json = core.table.try_read_attr(conf, "auth", "gcp",
"service_account_json")
+ if sa_json then
+ local _, err = core.json.decode(sa_json)
+ if err then
+ return false, "invalid gcp service_account_json: " .. err
+ end
+ end
return ok
end
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index e6e3fd9f3..324ac2da5 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -59,10 +59,11 @@ function _M.before_proxy(conf, ctx, on_error)
end
local extra_opts = {
+ name = ai_instance.name,
endpoint = core.table.try_read_attr(ai_instance, "override",
"endpoint"),
- query_params = ai_instance.auth.query or {},
- headers = (ai_instance.auth.header or {}),
model_options = ai_instance.options,
+ conf = ai_instance.provider_conf or {},
+ auth = ai_instance.auth,
}
if request_body.stream then
diff --git a/apisix/plugins/ai-proxy/schema.lua
b/apisix/plugins/ai-proxy/schema.lua
index 89f857c0d..3ed0d9e93 100644
--- a/apisix/plugins/ai-proxy/schema.lua
+++ b/apisix/plugins/ai-proxy/schema.lua
@@ -33,6 +33,29 @@ local auth_schema = {
patternProperties = {
header = auth_item_schema,
query = auth_item_schema,
+ gcp = {
+ type = "object",
+ description = 'Whether to use GCP service account for
authentication,'
+ .. ' support set env GCP_SERVICE_ACCOUNT.',
+ properties = {
+ service_account_json = {
+ type = "string",
+ description = "GCP service account JSON content for
authentication",
+ },
+ max_ttl = {
+ type = "integer",
+ minimum = 1,
+ description = "Maximum TTL (in seconds) for GCP access
token caching",
+ },
+ expire_early_secs = {
+ type = "integer",
+ minimum = 0,
+ description = "Expire the access token early by specified
seconds to avoid " ..
+ "edge cases",
+ default = 60,
+ },
+ }
+ },
},
additionalProperties = false,
}
@@ -49,6 +72,21 @@ local model_options_schema = {
additionalProperties = true,
}
+local provider_vertex_ai_schema = {
+ type = "object",
+ properties = {
+ project_id = {
+ type = "string",
+ description = "Google Cloud Project ID",
+ },
+ region = {
+ type = "string",
+ description = "Google Cloud Region",
+ },
+ },
+ required = { "project_id", "region" },
+}
+
local ai_instance_schema = {
type = "array",
minItems = 1,
@@ -94,7 +132,20 @@ local ai_instance_schema = {
required = {"active"}
}
},
- required = {"name", "provider", "auth", "weight"}
+ required = {"name", "provider", "auth", "weight"},
+ ["if"] = {
+ properties = { provider = { enum = { "vertex-ai" } } },
+ },
+ ["then"] = {
+ properties = {
+ provider_conf = provider_vertex_ai_schema,
+ },
+ oneOf = {
+ { required = { "provider_conf" } },
+ { required = { "override" } },
+ },
+ },
+ ["else"] = {},
},
}
diff --git a/apisix/plugins/ai-request-rewrite.lua
b/apisix/plugins/ai-request-rewrite.lua
index f0ebcf989..607afc35e 100644
--- a/apisix/plugins/ai-request-rewrite.lua
+++ b/apisix/plugins/ai-request-rewrite.lua
@@ -118,8 +118,7 @@ local function request_to_llm(conf, request_table, ctx)
local extra_opts = {
endpoint = core.table.try_read_attr(conf, "override", "endpoint"),
- query_params = conf.auth.query or {},
- headers = (conf.auth.header or {}),
+ auth = conf.auth,
model_options = conf.options
}
ctx.llm_request_start_time = ngx.now()
diff --git a/apisix/utils/google-cloud-oauth.lua
b/apisix/utils/google-cloud-oauth.lua
index 6cb352848..340f5a6df 100644
--- a/apisix/utils/google-cloud-oauth.lua
+++ b/apisix/utils/google-cloud-oauth.lua
@@ -76,6 +76,7 @@ function _M.refresh_access_token(self)
self.access_token = res.access_token
self.access_token_type = res.token_type
+ self.access_token_ttl = res.expires_in
self.access_token_expire_time = get_timestamp() + res.expires_in
end
@@ -121,6 +122,8 @@ function _M.new(config, ssl_verify)
if type(config.scope) == "table" then
oauth.scope = core.table.concat(config.scope, " ")
end
+ else
+ oauth.scope = "https://www.googleapis.com/auth/cloud-platform"
end
return setmetatable(oauth, { __index = _M })
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md
b/docs/en/latest/plugins/ai-proxy-multi.md
index c5883bf49..5c195ada2 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -7,7 +7,7 @@ keywords:
- ai-proxy-multi
- AI
- LLM
-description: The ai-proxy-multi Plugin extends the capabilities of ai-proxy
with load balancing, retries, fallbacks, and health chekcs, simplifying the
integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter,
Gemini, and other OpenAI-compatible APIs.
+description: The ai-proxy-multi Plugin extends the capabilities of ai-proxy
with load balancing, retries, fallbacks, and health chekcs, simplifying the
integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter,
Gemini, Vertex AI, and other OpenAI-compatible APIs.
---
<!--
@@ -35,7 +35,7 @@ description: The ai-proxy-multi Plugin extends the
capabilities of ai-proxy with
## Description
-The `ai-proxy-multi` Plugin simplifies access to LLM and embedding models by
transforming Plugin configurations into the designated request format for
OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, and other
OpenAI-compatible APIs. It extends the capabilities of
[`ai-proxy`](./ai-proxy.md) with load balancing, retries, fallbacks, and health
checks.
+The `ai-proxy-multi` Plugin simplifies access to LLM and embedding models by
transforming Plugin configurations into the designated request format for
OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, Vertex AI, and
other OpenAI-compatible APIs. It extends the capabilities of
[`ai-proxy`](./ai-proxy.md) with load balancing, retries, fallbacks, and health
checks.
In addition, the Plugin also supports logging LLM request information in the
access log, such as token usage, model, time to the first response, and more.
@@ -58,7 +58,7 @@ In addition, the Plugin also supports logging LLM request
information in the acc
| balancer.key | string | False |
| | Used when `type` is `chash`. When
`hash_on` is set to `header` or `cookie`, `key` is required. When `hash_on` is
set to `consumer`, `key` is not required as the consumer name will be used as
the key automatically. |
| instances | array[object] | True |
| | LLM instance configurations. |
| instances.name | string | True |
| | Name of the LLM service instance. |
-| instances.provider | string | True |
| [openai, deepseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, openai-compatible] | LLM service provider. When set to
`openai`, the Plugin will proxy the request to `api.openai.com`. When set to
`deepseek`, the Plugin will proxy the request to `api.deepseek.com`. When set
to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies the
request to `api.aimlapi.com` by [...]
+| instances.provider | string | True |
| [openai, deepseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, vertex-ai, openai-compatible] | LLM service provider. When
set to `openai`, the Plugin will proxy the request to `api.openai.com`. When
set to `deepseek`, the Plugin will proxy the request to `api.deepseek.com`.
When set to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies
the request to `api.aiml [...]
| instances.priority | integer | False | 0
| | Priority of the LLM instance in load
balancing. `priority` takes precedence over `weight`. |
| instances.weight | string | True | 0
| greater or equal to 0 | Weight of the LLM instance in
load balancing. |
| instances.auth | object | True |
| | Authentication configurations. |
diff --git a/docs/en/latest/plugins/ai-proxy.md
b/docs/en/latest/plugins/ai-proxy.md
index cde04bc47..e09f0c072 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -7,7 +7,7 @@ keywords:
- ai-proxy
- AI
- LLM
-description: The ai-proxy Plugin simplifies access to LLM and embedding models
providers by converting Plugin configurations into the required request format
for OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, and other
OpenAI-compatible APIs.
+description: The ai-proxy Plugin simplifies access to LLM and embedding models
providers by converting Plugin configurations into the required request format
for OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, Vertex AI,
and other OpenAI-compatible APIs.
---
<!--
@@ -35,7 +35,7 @@ description: The ai-proxy Plugin simplifies access to LLM and
embedding models p
## Description
-The `ai-proxy` Plugin simplifies access to LLM and embedding models by
transforming Plugin configurations into the designated request format. It
supports the integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic,
OpenRouter, Gemini, and other OpenAI-compatible APIs.
+The `ai-proxy` Plugin simplifies access to LLM and embedding models by
transforming Plugin configurations into the designated request format. It
supports the integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic,
OpenRouter, Gemini, Vertex AI, and other OpenAI-compatible APIs.
In addition, the Plugin also supports logging LLM request information in the
access log, such as token usage, model, time to the first response, and more.
@@ -51,7 +51,7 @@ In addition, the Plugin also supports logging LLM request
information in the acc
| Name | Type | Required | Default | Valid values
| Description |
|--------------------|--------|----------|---------|------------------------------------------|-------------|
-| provider | string | True | | [openai, deepseek,
azure-openai, aimlapi, anthropic, openrouter, gemini, openai-compatible] | LLM
service provider. When set to `openai`, the Plugin will proxy the request to
`https://api.openai.com/chat/completions`. When set to `deepseek`, the Plugin
will proxy the request to `https://api.deepseek.com/chat/completions`. When set
to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies the
request to `https://api.aimlapi [...]
+| provider | string | True | | [openai, deepseek,
azure-openai, aimlapi, anthropic, openrouter, gemini, vertex-ai,
openai-compatible] | LLM service provider. When set to `openai`, the Plugin
will proxy the request to `https://api.openai.com/chat/completions`. When set
to `deepseek`, the Plugin will proxy the request to
`https://api.deepseek.com/chat/completions`. When set to `aimlapi`, the Plugin
uses the OpenAI-compatible driver and proxies the request to `https:// [...]
| auth | object | True | |
| Authentication configurations. |
| auth.header | object | False | |
| Authentication headers. At least one of `header` or `query`
must be configured. |
| auth.query | object | False | |
| Authentication query parameters. At least one of `header` or
`query` must be configured. |
diff --git a/docs/en/latest/plugins/ai-request-rewrite.md
b/docs/en/latest/plugins/ai-request-rewrite.md
index c5d0d1f5b..955db9734 100644
--- a/docs/en/latest/plugins/ai-request-rewrite.md
+++ b/docs/en/latest/plugins/ai-request-rewrite.md
@@ -36,7 +36,7 @@ The `ai-request-rewrite` plugin intercepts client requests
before they are forwa
| **Field** | **Required** | **Type** | **Description**
|
| ------------------------- | ------------ | -------- |
------------------------------------------------------------------------------------
|
| prompt | Yes | String | The prompt send to LLM
service. |
-| provider | Yes | String | Name of the LLM
service. Available options: openai, deekseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, and openai-compatible. When `aimlapi` is selected, the
plugin uses the OpenAI-compatible driver with a default endpoint of
`https://api.aimlapi.com/v1/chat/completions`. |
+| provider | Yes | String | Name of the LLM
service. Available options: openai, deekseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, vertex-ai, and openai-compatible. When `aimlapi` is
selected, the plugin uses the OpenAI-compatible driver with a default endpoint
of `https://api.aimlapi.com/v1/chat/completions`. |
| auth | Yes | Object | Authentication
configuration |
| auth.header | No | Object | Authentication
headers. Key must match pattern `^[a-zA-Z0-9._-]+$`. |
| auth.query | No | Object | Authentication query
parameters. Key must match pattern `^[a-zA-Z0-9._-]+$`. |
diff --git a/docs/zh/latest/plugins/ai-proxy-multi.md
b/docs/zh/latest/plugins/ai-proxy-multi.md
index 3143d9704..40f7157b3 100644
--- a/docs/zh/latest/plugins/ai-proxy-multi.md
+++ b/docs/zh/latest/plugins/ai-proxy-multi.md
@@ -7,7 +7,7 @@ keywords:
- ai-proxy-multi
- AI
- LLM
-description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和健康检查扩展了 ai-proxy 的功能,简化了与
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容 API 的集成。
+description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和健康检查扩展了 ai-proxy 的功能,简化了与
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI
兼容 API 的集成。
---
<!--
@@ -35,7 +35,7 @@ description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和
## 描述
-`ai-proxy-multi` 插件通过将插件配置转换为
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容 API
的指定请求格式,简化了对 LLM 和嵌入模型的访问。它通过负载均衡、重试、故障转移和健康检查扩展了 [`ai-proxy`](./ai-proxy.md)
的功能。
+`ai-proxy-multi` 插件通过将插件配置转换为
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI
兼容 API 的指定请求格式,简化了对 LLM 和嵌入模型的访问。它通过负载均衡、重试、故障转移和健康检查扩展了
[`ai-proxy`](./ai-proxy.md) 的功能。
此外,该插件还支持在访问日志中记录 LLM 请求信息,如令牌使用量、模型、首次响应时间等。
@@ -58,7 +58,7 @@ description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和
| balancer.key | string | 否 |
| | 当 `type` 为 `chash` 时使用。当 `hash_on` 设置为
`header` 或 `cookie` 时,需要 `key`。当 `hash_on` 设置为 `consumer` 时,不需要
`key`,因为消费者名称将自动用作键。 |
| instances | array[object] | 是 |
| | LLM 实例配置。 |
| instances.name | string | 是 |
| | LLM 服务实例的名称。 |
-| instances.provider | string | 是 |
| [openai, deepseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, openai-compatible] | LLM 服务提供商。设置为 `openai` 时,插件将代理请求到
`api.openai.com`。设置为 `deepseek` 时,插件将代理请求到 `api.deepseek.com`。设置为 `aimlapi`
时,插件使用 OpenAI 兼容驱动程序,默认将请求代理到 `api.aimlapi.com`。设置为 `anthropic` 时,插件使用 OpenAI
兼容驱动程序,默认将请求代理到 `api.anthropic.com`。设置为 `openrouter` 时,插件使用 OpenAI
兼容驱动程序,默认将请求代理到 `openrouter.ai`。设置为 `gemini [...]
+| instances.provider | string | 是 |
| [openai, deepseek, azure-openai, aimlapi, anthropic,
openrouter, gemini, vertex-ai, openai-compatible] | LLM 服务提供商。设置为 `openai`
时,插件将代理请求到 `api.openai.com`。设置为 `deepseek` 时,插件将代理请求到 `api.deepseek.com`。设置为
`aimlapi` 时,插件使用 OpenAI 兼容驱动程序,默认将请求代理到 `api.aimlapi.com`。设置为 `anthropic`
时,插件使用 OpenAI 兼容驱动程序,默认将请求代理到 `api.anthropic.com`。设置为 `openrouter` 时,插件使用
OpenAI 兼容驱动程序,默认将请求代理到 `openrouter.ai`。 [...]
| instances.priority | integer | 否 | 0
| | LLM 实例在负载均衡中的优先级。`priority` 优先于 `weight`。 |
| instances.weight | string | 是 | 0
| 大于或等于 0 | LLM 实例在负载均衡中的权重。 |
| instances.auth | object | 是 |
| | 身份验证配置。 |
diff --git a/docs/zh/latest/plugins/ai-proxy.md
b/docs/zh/latest/plugins/ai-proxy.md
index dea580ae2..8035237d7 100644
--- a/docs/zh/latest/plugins/ai-proxy.md
+++ b/docs/zh/latest/plugins/ai-proxy.md
@@ -7,7 +7,7 @@ keywords:
- ai-proxy
- AI
- LLM
-description: ai-proxy 插件通过将插件配置转换为所需的请求格式,简化了对 LLM 和嵌入模型提供商的访问,支持
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容的 API。
+description: ai-proxy 插件通过将插件配置转换为所需的请求格式,简化了对 LLM 和嵌入模型提供商的访问,支持
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI
兼容的 API。
---
<!--
@@ -35,7 +35,7 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
## 描述
-`ai-proxy` 插件通过将插件配置转换为指定的请求格式,简化了对 LLM 和嵌入模型的访问。它支持与
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容的 API 集成。
+`ai-proxy` 插件通过将插件配置转换为指定的请求格式,简化了对 LLM 和嵌入模型的访问。它支持与
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI
兼容的 API 集成。
此外,该插件还支持在访问日志中记录 LLM 请求信息,如令牌使用量、模型、首次响应时间等。
@@ -51,7 +51,7 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
| 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述
|
|--------------------|--------|----------|---------|------------------------------------------|-------------|
-| provider | string | 是 | | [openai, deepseek,
azure-openai, aimlapi, anthropic, openrouter, gemini, openai-compatible] | LLM
服务提供商。当设置为 `openai` 时,插件将代理请求到 `https://api.openai.com/chat/completions`。当设置为
`deepseek` 时,插件将代理请求到 `https://api.deepseek.com/chat/completions`。当设置为
`aimlapi` 时,插件使用 OpenAI 兼容驱动程序,默认将请求代理到
`https://api.aimlapi.com/v1/chat/completions`。当设置为 `anthropic` 时,插件将代理请求到
`https://api.anthropic.com/v1/chat/completions`。当设置为 `openrouter` 时,插件使用 OpenAI
兼 [...]
+| provider | string | 是 | | [openai, deepseek,
azure-openai, aimlapi, anthropic, openrouter, gemini, vertex-ai,
openai-compatible] | LLM 服务提供商。当设置为 `openai` 时,插件将代理请求到
`https://api.openai.com/chat/completions`。当设置为 `deepseek` 时,插件将代理请求到
`https://api.deepseek.com/chat/completions`。当设置为 `aimlapi` 时,插件使用 OpenAI
兼容驱动程序,默认将请求代理到 `https://api.aimlapi.com/v1/chat/completions`。当设置为 `anthropic`
时,插件将代理请求到 `https://api.anthropic.com/v1/chat/completions`。当设置为 `openrouter`
时,插件 [...]
| auth | object | 是 | |
| 身份验证配置。 |
| auth.header | object | 否 | |
| 身份验证标头。必须配置 `header` 或 `query` 中的至少一个。 |
| auth.query | object | 否 | |
| 身份验证查询参数。必须配置 `header` 或 `query` 中的至少一个。 |
diff --git a/docs/zh/latest/plugins/ai-request-rewrite.md
b/docs/zh/latest/plugins/ai-request-rewrite.md
index 1e8d1cf8a..d98630d93 100644
--- a/docs/zh/latest/plugins/ai-request-rewrite.md
+++ b/docs/zh/latest/plugins/ai-request-rewrite.md
@@ -36,7 +36,7 @@ description: ai-request-rewrite 插件在客户端请求转发到上游服务之
| **字段** | **必选项** | **类型** | **描述**
|
| ------------------------- | ------------ | -------- |
------------------------------------------------------------------------------------
|
| prompt | 是 | String | 发送到 LLM 服务的提示。
|
-| provider | 是 | String | LLM
服务的名称。可用选项:openai、deekseek、azure-openai、aimlapi、anthropic、openrouter、gemini 和
openai-compatible。当选择 `aimlapi` 时,插件使用 OpenAI 兼容驱动程序,默认端点为
`https://api.aimlapi.com/v1/chat/completions`。 |
+| provider | 是 | String | LLM
服务的名称。可用选项:openai、deekseek、azure-openai、aimlapi、anthropic、openrouter、gemini、vertex-ai
和 openai-compatible。当选择 `aimlapi` 时,插件使用 OpenAI 兼容驱动程序,默认端点为
`https://api.aimlapi.com/v1/chat/completions`。 |
| auth | 是 | Object | 身份验证配置
|
| auth.header | 否 | Object | 身份验证头部。键必须匹配模式
`^[a-zA-Z0-9._-]+$`。 |
| auth.query | 否 | Object | 身份验证查询参数。键必须匹配模式
`^[a-zA-Z0-9._-]+$`。 |
diff --git a/t/plugin/ai-proxy-vertex-ai.t b/t/plugin/ai-proxy-vertex-ai.t
new file mode 100644
index 000000000..615c805fd
--- /dev/null
+++ b/t/plugin/ai-proxy-vertex-ai.t
@@ -0,0 +1,572 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("debug");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+
+my $resp_file = 't/assets/ai-proxy-response.json';
+open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
+my $resp = do { local $/; <$fh> };
+close($fh);
+
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $user_yaml_config = <<_EOC_;
+plugins:
+ - ai-proxy-multi
+ - prometheus
+_EOC_
+ $block->set_value("extra_yaml_config", $user_yaml_config);
+
+ my $extra_init_worker_by_lua = <<_EOC_;
+ local gcp_accesstoken = require "apisix.utils.google-cloud-oauth"
+ local ttl = 0
+ gcp_accesstoken.refresh_access_token = function(self)
+ ngx.log(ngx.NOTICE, "[test] mocked gcp_accesstoken called")
+ ttl = ttl + 5
+ self.access_token_ttl = ttl
+ self.access_token = "ya29.c.Kp8B..."
+ end
+_EOC_
+
+ $block->set_value("extra_init_worker_by_lua", $extra_init_worker_by_lua);
+
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ server_name openai;
+ listen 6724;
+
+ default_type 'application/json';
+
+ location /v1/chat/completions {
+ content_by_lua_block {
+ local json = require("toolkit.json")
+
+ if ngx.req.get_method() ~= "POST" then
+ ngx.status = 400
+ ngx.say("Unsupported request method: ",
ngx.req.get_method())
+ end
+ ngx.req.read_body()
+ local body, err = ngx.req.get_body_data()
+ body, err = json.decode(body)
+
+ if body and body.instances then
+ local vertex_response = {
+ ["predictions"] = {
+ {
+ ["embeddings"] = {
+ ["statistics"] = {
+ ["token_count"] = 7
+ },
+ ["values"] = {
+ 0.0123,
+ -0.0456,
+ 0.0789,
+ 0.0012
+ }
+ }
+ },
+ }
+ }
+ local body = json.encode(vertex_response)
+ ngx.status = 200
+ ngx.say(body)
+ return
+ end
+
+ local test_type = ngx.req.get_headers()["test-type"]
+ if test_type == "options" then
+ if body.foo == "bar" then
+ ngx.status = 200
+ ngx.say("options works")
+ else
+ ngx.status = 500
+ ngx.say("model options feature doesn't work")
+ end
+ return
+ end
+
+ local header_auth = ngx.req.get_headers()["authorization"]
+ local query_auth = ngx.req.get_uri_args()["apikey"]
+
+ if header_auth ~= "Bearer token" and query_auth ~=
"apikey" and header_auth ~= "Bearer ya29.c.Kp8B..." then
+ ngx.status = 401
+ ngx.say("Unauthorized")
+ return
+ end
+
+ if header_auth == "Bearer token" or query_auth == "apikey"
or header_auth == "Bearer ya29.c.Kp8B..." then
+ if header_auth == "Bearer ya29.c.Kp8B..." then
+ ngx.log(ngx.NOTICE, "[test] GCP service account
auth works")
+ end
+ ngx.req.read_body()
+ local body, err = ngx.req.get_body_data()
+ body, err = json.decode(body)
+
+ if not body.messages or #body.messages < 1 then
+ ngx.status = 400
+ ngx.say([[{ "error": "bad request"}]])
+ return
+ end
+ if body.messages[1].content == "write an SQL query to
get all rows from student table" then
+ ngx.print("SELECT * FROM STUDENTS")
+ return
+ end
+
+ ngx.status = 200
+ ngx.say([[$resp]])
+ return
+ end
+
+
+ ngx.status = 503
+ ngx.say("reached the end of the test suite")
+ }
+ }
+
+ location /random {
+ content_by_lua_block {
+ ngx.say("path override works")
+ }
+ }
+
+ location ~ ^/status.* {
+ content_by_lua_block {
+ local test_dict = ngx.shared["test"]
+ local uri = ngx.var.uri
+ local total_key = uri .. "#total"
+ local count_key = uri .. "#count"
+ local total = test_dict:get(total_key)
+ if not total then
+ return
+ end
+
+ local count = test_dict:incr(count_key, 1, 0)
+ ngx.log(ngx.INFO, "uri: ", uri, " total: ", total, "
count: ", count)
+ if count < total then
+ return
+ end
+ ngx.status = 500
+ ngx.say("error")
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: set route with right auth header
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [
+ {
+ "name": "vertex-ai",
+ "provider": "vertex-ai",
+ "weight": 1,
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gemini-2.0-flash",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint":
"http://localhost:6724/v1/chat/completions"
+ }
+ }
+ ],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 2: send request
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, {
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+
+
+
+=== TEST 3: request embeddings, check values field in response
+--- request
+POST /anything
+{"input": "Your text string goes here"}
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"embedding":\[0.0123,-0.0456,0.0789,0.0012\]/
+
+
+
+=== TEST 4: request embeddings, check token_count field in response
+--- request
+POST /anything
+{"input": "Your text string goes here"}
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"total_tokens":7/
+
+
+
+=== TEST 5: set route with right auth gcp service account
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [
+ {
+ "name": "vertex-ai",
+ "provider": "vertex-ai",
+ "weight": 1,
+ "auth": {
+ "gcp": { "max_ttl": 8 }
+ },
+ "options": {
+ "model": "gemini-2.0-flash",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint":
"http://localhost:6724/v1/chat/completions"
+ }
+ }
+ ],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 6: send request
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, {
"role": "user", "content": "What is 1+1?"} ] }
+--- error_code: 200
+--- error_log
+[test] GCP service account auth works
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+
+
+
+=== TEST 7: check gcp access token caching works
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local core = require("apisix.core")
+ local send_request = function()
+ local code, _, body = t("/anything",
+ ngx.HTTP_POST,
+ [[{
+ "messages": [
+ { "role": "system", "content": "You are a
mathematician" },
+ { "role": "user", "content": "What is 1+1?" }
+ ]
+ }]],
+ nil,
+ {
+ ["Content-Type"] = "application/json",
+ }
+ )
+ assert(code == 200, "request should be successful")
+ return body
+ end
+ for i = 1, 6 do
+ send_request()
+ end
+
+ ngx.sleep(5.5)
+ send_request()
+
+ ngx.say("passed")
+ }
+ }
+--- timeout: 7
+--- response_body
+passed
+--- error_log
+[test] mocked gcp_accesstoken called
+[test] mocked gcp_accesstoken called
+set gcp access token in cache with ttl: 5
+set gcp access token in cache with ttl: 8
+
+
+
+=== TEST 8: set route with multiple instances and gcp service account
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [
+ {
+ "name": "vertex-ai-one",
+ "provider": "vertex-ai",
+ "weight": 1,
+ "auth": {
+ "gcp": {}
+ },
+ "options": {
+ "model": "gemini-2.0-flash",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint":
"http://localhost:6724/v1/chat/completions"
+ }
+ },
+ {
+ "name": "vertex-ai-multi",
+ "provider": "vertex-ai",
+ "weight": 1,
+ "auth": {
+ "gcp": {}
+ },
+ "options": {
+ "model": "gemini-2.0-flash",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint":
"http://localhost:6724/v1/chat/completions"
+ }
+ }
+ ],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 9: check gcp access token caching works
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local core = require("apisix.core")
+ local send_request = function()
+ local code, _, body = t("/anything",
+ ngx.HTTP_POST,
+ [[{
+ "messages": [
+ { "role": "system", "content": "You are a
mathematician" },
+ { "role": "user", "content": "What is 1+1?" }
+ ]
+ }]],
+ nil,
+ {
+ ["Content-Type"] = "application/json",
+ }
+ )
+ assert(code == 200, "request should be successful")
+ return body
+ end
+ for i = 1, 12 do
+ send_request()
+ end
+
+ ngx.say("passed")
+ }
+ }
+--- timeout: 7
+--- response_body
+passed
+--- error_log
+#vertex-ai-one
+#vertex-ai-multi
+
+
+
+=== TEST 10: set ai-proxy-multi with health checks
+--- config
+ location /t {
+ content_by_lua_block {
+ local checks = [[
+ "checks": {
+ "active": {
+ "timeout": 5,
+ "http_path": "/status/gpt4",
+ "host": "foo.com",
+ "healthy": {
+ "interval": 1,
+ "successes": 1
+ },
+ "unhealthy": {
+ "interval": 1,
+ "http_failures": 1
+ },
+ "req_headers": ["User-Agent: curl/7.29.0"]
+ }
+ }]]
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ string.format([[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [
+ {
+ "name": "vertex-ai",
+ "provider": "vertex-ai",
+ "weight": 1,
+ "priority": 2,
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gemini-2.0-flash",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint":
"http://localhost:6724/v1/chat/completions"
+ },
+ %s
+ },
+ {
+ "name": "openai-gpt3",
+ "provider": "openai",
+ "weight": 1,
+ "priority": 1,
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gpt-3"
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ }
+ }
+ ],
+ "ssl_verify": false
+ }
+ }
+ }]], checks)
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 11: check health check works
+--- wait: 5
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, {
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+--- error_log
+creating healthchecker for upstream
+request head: GET /status/gpt4