(apisix) branch master updated: feat: support vertex-ai (#12933)

young Mon, 26 Jan 2026 00:02:00 -0800

This is an automated email from the ASF dual-hosted git repository.

young pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git



The following commit(s) were added to refs/heads/master by this push:
     new 5b7170def feat: support vertex-ai (#12933)
5b7170def is described below

commit 5b7170def6deccffb593bdab13fb705f4202f75d
Author: YYYoung <[email protected]>
AuthorDate: Mon Jan 26 16:01:07 2026 +0800

    feat: support vertex-ai (#12933)
---
 apisix/cli/ngx_tpl.lua                       |   1 +
 apisix/plugins/ai-drivers/azure-openai.lua   |   4 +-
 apisix/plugins/ai-drivers/openai-base.lua    | 142 +++++--
 apisix/plugins/ai-drivers/schema.lua         |   1 +
 apisix/plugins/ai-drivers/vertex-ai.lua      | 191 +++++++++
 apisix/plugins/ai-proxy-multi.lua            |  18 +-
 apisix/plugins/ai-proxy.lua                  |   7 +
 apisix/plugins/ai-proxy/base.lua             |   5 +-
 apisix/plugins/ai-proxy/schema.lua           |  53 ++-
 apisix/plugins/ai-request-rewrite.lua        |   3 +-
 apisix/utils/google-cloud-oauth.lua          |   3 +
 docs/en/latest/plugins/ai-proxy-multi.md     |   6 +-
 docs/en/latest/plugins/ai-proxy.md           |   6 +-
 docs/en/latest/plugins/ai-request-rewrite.md |   2 +-
 docs/zh/latest/plugins/ai-proxy-multi.md     |   6 +-
 docs/zh/latest/plugins/ai-proxy.md           |   6 +-
 docs/zh/latest/plugins/ai-request-rewrite.md |   2 +-
 t/plugin/ai-proxy-vertex-ai.t                | 572 +++++++++++++++++++++++++++
 18 files changed, 973 insertions(+), 55 deletions(-)

diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
index bfca1ccba..e82b70b04 100644
--- a/apisix/cli/ngx_tpl.lua
+++ b/apisix/cli/ngx_tpl.lua
@@ -53,6 +53,7 @@ env PATH; # for searching external plugin runner's binary
 
 # reserved environment variables for configuration
 env APISIX_DEPLOYMENT_ETCD_HOST;
+env GCP_SERVICE_ACCOUNT;
 
 {% if envs then %}
 {% for _, name in ipairs(envs) do %}
diff --git a/apisix/plugins/ai-drivers/azure-openai.lua 
b/apisix/plugins/ai-drivers/azure-openai.lua
index 3022e7671..3df83fa1e 100644
--- a/apisix/plugins/ai-drivers/azure-openai.lua
+++ b/apisix/plugins/ai-drivers/azure-openai.lua
@@ -19,8 +19,6 @@ return require("apisix.plugins.ai-drivers.openai-base").new(
     {
         path = "/completions",
         port = 443,
-        options = {
-            remove_model = true
-        }
+        remove_model = true
     }
 )
diff --git a/apisix/plugins/ai-drivers/openai-base.lua 
b/apisix/plugins/ai-drivers/openai-base.lua
index cb48c3e07..8688f9eb0 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -27,6 +27,9 @@ local plugin = require("apisix.plugin")
 local http = require("resty.http")
 local url  = require("socket.url")
 local sse  = require("apisix.plugins.ai-drivers.sse")
+local google_oauth = require("apisix.utils.google-cloud-oauth")
+
+local lrucache = require("resty.lrucache")
 local ngx  = ngx
 local ngx_now = ngx.now
 
@@ -34,6 +37,7 @@ local table = table
 local pairs = pairs
 local type  = type
 local math  = math
+local os    = os
 local ipairs = ipairs
 local setmetatable = setmetatable
 
@@ -41,15 +45,8 @@ local HTTP_INTERNAL_SERVER_ERROR = 
ngx.HTTP_INTERNAL_SERVER_ERROR
 local HTTP_GATEWAY_TIMEOUT = ngx.HTTP_GATEWAY_TIMEOUT
 
 
-function _M.new(opts)
-
-    local self = {
-        host = opts.host,
-        port = opts.port,
-        path = opts.path,
-        remove_model = opts.options and opts.options.remove_model
-    }
-    return setmetatable(self, mt)
+function _M.new(opt)
+    return setmetatable(opt, mt)
 end
 
 
@@ -76,7 +73,7 @@ local function handle_error(err)
 end
 
 
-local function read_response(ctx, res)
+local function read_response(conf, ctx, res, response_filter)
     local body_reader = res.body_reader
     if not body_reader then
         core.log.warn("AI service sent no response body")
@@ -153,6 +150,7 @@ local function read_response(ctx, res)
         end
     end
 
+    local headers = res.headers
     local raw_res_body, err = res:read_body()
     if not raw_res_body then
         core.log.warn("failed to read response body: ", err)
@@ -166,6 +164,25 @@ local function read_response(ctx, res)
         core.log.warn("invalid response body from ai service: ", raw_res_body, 
" err: ", err,
             ", it will cause token usage not available")
     else
+        if response_filter then
+            local resp = {
+                headers = headers,
+                body = res_body,
+            }
+            local code, err = response_filter(conf, ctx, resp)
+            if code then
+                return code, err
+            end
+            if resp.body then
+                local body, err = core.json.encode(resp.body)
+                if not body then
+                    core.log.error("failed to encode response body after 
response filter: ", err)
+                    return 500
+                end
+                raw_res_body = body
+            end
+            headers = resp.headers
+        end
         core.log.info("got token usage from ai service: ", 
core.json.delay_encode(res_body.usage))
         ctx.ai_token_usage = {}
         if type(res_body.usage) == "table" then
@@ -189,7 +206,44 @@ local function read_response(ctx, res)
             ctx.var.llm_response_text = content_to_check
         end
     end
-    plugin.lua_response_filter(ctx, res.headers, raw_res_body)
+    plugin.lua_response_filter(ctx, headers, raw_res_body)
+end
+
+
+local gcp_access_token_cache = lrucache.new(1024 * 4)
+
+local function fetch_gcp_access_token(ctx, name, gcp_conf)
+    local key = core.lrucache.plugin_ctx_id(ctx, name)
+    local access_token = gcp_access_token_cache:get(key)
+    if access_token then
+        return access_token
+    end
+    -- generate access token
+    local auth_conf = {}
+    local service_account_json = gcp_conf.service_account_json or
+                                    os.getenv("GCP_SERVICE_ACCOUNT")
+    if type(service_account_json) == "string" and service_account_json ~= "" 
then
+        local conf, err = core.json.decode(service_account_json)
+        if not conf then
+            return nil, "invalid gcp service account json: " .. (err or 
"unknown error")
+        end
+        auth_conf = conf
+    end
+    local oauth = google_oauth.new(auth_conf)
+    access_token = oauth:generate_access_token()
+    if not access_token then
+        return nil, "failed to get google oauth token"
+    end
+    local ttl = oauth.access_token_ttl or 6
+    if gcp_conf.expire_early_secs and ttl > gcp_conf.expire_early_secs then
+        ttl = ttl - gcp_conf.expire_early_secs
+    end
+    if gcp_conf.max_ttl and ttl > gcp_conf.max_ttl then
+        ttl = gcp_conf.max_ttl
+    end
+    gcp_access_token_cache:set(key, access_token, ttl)
+    core.log.debug("set gcp access token in cache with ttl: ", ttl, ", key: ", 
key)
+    return access_token
 end
 
 
@@ -201,7 +255,21 @@ function _M.request(self, ctx, conf, request_table, 
extra_opts)
     end
     httpc:set_timeout(conf.timeout)
 
-    local endpoint = extra_opts and extra_opts.endpoint
+    core.log.info("request extra_opts to LLM server: ", 
core.json.delay_encode(extra_opts, true))
+
+    local auth = extra_opts.auth or {}
+    local token
+    if auth.gcp then
+        local access_token, err = fetch_gcp_access_token(ctx, extra_opts.name,
+                                        auth.gcp)
+        if not access_token then
+            core.log.error("failed to get gcp access token: ", err)
+            return 500
+        end
+        token = access_token
+    end
+
+    local endpoint = extra_opts.endpoint
     local parsed_url
     if endpoint then
         parsed_url = url.parse(endpoint)
@@ -217,20 +285,8 @@ function _M.request(self, ctx, conf, request_table, 
extra_opts)
             port = 80
         end
     end
-    local ok, err = httpc:connect({
-        scheme = scheme,
-        host = host,
-        port = port,
-        ssl_verify = conf.ssl_verify,
-        ssl_server_name = parsed_url and parsed_url.host or self.host,
-    })
-
-    if not ok then
-        core.log.warn("failed to connect to LLM server: ", err)
-        return handle_error(err)
-    end
 
-    local query_params = extra_opts.query_params
+    local query_params = auth.query or {}
 
     if type(parsed_url) == "table" and parsed_url.query and #parsed_url.query 
> 0 then
         local args_tab = core.string.decode_args(parsed_url.query)
@@ -241,14 +297,22 @@ function _M.request(self, ctx, conf, request_table, 
extra_opts)
 
     local path = (parsed_url and parsed_url.path or self.path)
 
-    local headers = extra_opts.headers
+    local headers = auth.header or {}
     headers["Content-Type"] = "application/json"
+    if token then
+        headers["Authorization"] = "Bearer " .. token
+    end
+
     local params = {
         method = "POST",
+        scheme = scheme,
         headers = headers,
         ssl_verify = conf.ssl_verify,
         path = path,
-        query = query_params
+        query = query_params,
+        host = host,
+        port = port,
+        ssl_server_name = parsed_url and parsed_url.host or self.host,
     }
 
     if extra_opts.model_options then
@@ -256,12 +320,30 @@ function _M.request(self, ctx, conf, request_table, 
extra_opts)
             request_table[opt] = val
         end
     end
+    params.body = request_table
+
     if self.remove_model then
         request_table.model = nil
     end
-    local req_json, err = core.json.encode(request_table)
+
+    if self.request_filter then
+        local code, err = self.request_filter(extra_opts.conf, ctx, params)
+        if code then
+            return code, err
+        end
+    end
+
+    core.log.info("sending request to LLM server: ", 
core.json.delay_encode(params, true))
+
+    local ok, err = httpc:connect(params)
+    if not ok then
+        core.log.error("failed to connect to LLM server: ", err)
+        return handle_error(err)
+    end
+
+    local req_json, err = core.json.encode(params.body)
     if not req_json then
-        return nil, err
+        return 500, "failed to encode request body: " .. (err or "unknown 
error")
     end
 
     params.body = req_json
@@ -277,7 +359,7 @@ function _M.request(self, ctx, conf, request_table, 
extra_opts)
         return res.status
     end
 
-    local code, body = read_response(ctx, res)
+    local code, body = read_response(extra_opts.conf, ctx, res, 
self.response_filter)
 
     if conf.keepalive then
         local ok, err = httpc:set_keepalive(conf.keepalive_timeout, 
conf.keepalive_pool)
diff --git a/apisix/plugins/ai-drivers/schema.lua 
b/apisix/plugins/ai-drivers/schema.lua
index 8048602ea..4f08f88ae 100644
--- a/apisix/plugins/ai-drivers/schema.lua
+++ b/apisix/plugins/ai-drivers/schema.lua
@@ -49,6 +49,7 @@ local openai_compatible_list = {
     "openai-compatible",
     "azure-openai",
     "openrouter",
+    "vertex-ai",
     "gemini",
 }
 
diff --git a/apisix/plugins/ai-drivers/vertex-ai.lua 
b/apisix/plugins/ai-drivers/vertex-ai.lua
new file mode 100644
index 000000000..9b1e35113
--- /dev/null
+++ b/apisix/plugins/ai-drivers/vertex-ai.lua
@@ -0,0 +1,191 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core = require("apisix.core")
+local string = string
+local str_fmt = string.format
+local type = type
+local ipairs = ipairs
+
+local host_template_fmt =
+        "%s-aiplatform.googleapis.com"
+local embeddings_path_template_fmt =
+        "/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
+local chat_completions_path_template_fmt =
+        "/v1beta1/projects/%s/locations/%s/endpoints/openapi/chat/completions"
+
+local function get_host(region)
+    return str_fmt(host_template_fmt, region)
+end
+
+
+local function get_chat_completions_path(project_id, region)
+    return str_fmt(chat_completions_path_template_fmt, project_id, region)
+end
+
+
+local function get_embeddings_path(project_id, region, model)
+    return str_fmt(embeddings_path_template_fmt, project_id, region, model)
+end
+
+
+local function get_node(instance_conf)
+    local host = "aiplatform.googleapis.com"
+    local region = core.table.try_read_attr(instance_conf, "provider_conf", 
"region")
+    if region then
+        host = get_host(region)
+    end
+    return {
+        scheme = "https",
+        host = host,
+        port = 443,
+    }
+end
+
+local function openai_embeddings_to_vertex_predict(openai_req)
+    if not openai_req then
+        return nil, "empty openai request"
+    end
+
+    local input = openai_req.input
+    if not input then
+        return nil, "`input` is required for embeddings"
+    end
+
+    local input_contexts = {}
+
+    if type(input) == "string" then
+        input_contexts = { input }
+    elseif type(input) == "table" then
+        for i, v in ipairs(input) do
+            if type(v) == "string" then
+                core.table.insert(input_contexts, v)
+            elseif type(v) == "table" then
+                core.table.insert(input_contexts, core.table.concat(v, " "))
+            else
+                return nil, "unsupported input type at index " .. i
+            end
+        end
+    else
+        return nil, "`input` must be string or array"
+    end
+
+    local instances = {}
+    for _, text in ipairs(input_contexts) do
+        core.table.insert(instances, {
+            content = text
+        })
+    end
+
+    return {
+        instances = instances
+    }
+end
+
+local function vertex_predict_to_openai_embeddings(vertex_resp, openai_model)
+    if type(vertex_resp) ~= "table" then
+        return nil, "empty vertex response"
+    end
+
+    local predictions = vertex_resp.predictions
+    if type(predictions) ~= "table" then
+        return nil, "vertex response missing predictions"
+    end
+
+    local data = {}
+    local total_tokens = 0
+
+    for i, pred in ipairs(predictions) do
+        local emb = pred.embeddings or {}
+        local values = emb.values
+        if type(values) ~= "table" then
+            return nil, "invalid embedding at index " .. i
+        end
+
+        if emb.statistics and emb.statistics.token_count then
+            total_tokens = total_tokens + emb.statistics.token_count
+        end
+
+        core.table.insert(data, {
+            object = "embedding",
+            index = i - 1,
+            embedding = values
+        })
+    end
+
+    return {
+        object = "list",
+        data = data,
+        model = openai_model or "unknown",
+        usage = {
+            prompt_tokens = total_tokens,
+            total_tokens = total_tokens,
+        }
+    }
+end
+
+
+local function request_filter(conf, ctx, http_params)
+    local body = http_params.body
+    if body and body.input then
+        ctx.llm_request_type = "embeddings"
+        local vertex_req, err = openai_embeddings_to_vertex_predict(body)
+        if not vertex_req then
+            return nil, "failed to convert to vertex predict request: " .. err
+        end
+        http_params.body = vertex_req
+        core.log.debug("using embeddings endpoint for Vertex AI")
+    else
+        ctx.llm_request_type = "chat_completions"
+    end
+    ctx.llm_request_model = body and body.model
+
+    if conf.project_id and conf.region then
+        if not http_params.path then
+            local path
+            if ctx.llm_request_type == "embeddings" then
+                path = get_embeddings_path(conf.project_id, conf.region, 
body.model)
+            else
+                path = get_chat_completions_path(conf.project_id, conf.region)
+            end
+            http_params.path = path
+        end
+        if not http_params.host then
+            http_params.host = get_host(conf.region)
+        end
+    end
+end
+
+
+local function response_filter(conf, ctx, resp)
+    if ctx.llm_request_type == "embeddings" then
+        local vertex_body = resp.body
+        local openai_resp, err = 
vertex_predict_to_openai_embeddings(vertex_body,
+                                                                    
ctx.llm_request_model)
+        if not openai_resp then
+            return 500, "failed to convert to openai embeddings response: " .. 
err
+        end
+        resp.body = openai_resp
+    end
+end
+
+
+return require("apisix.plugins.ai-drivers.openai-base").new({
+    get_node = get_node,
+    request_filter = request_filter,
+    response_filter = response_filter,
+})
diff --git a/apisix/plugins/ai-proxy-multi.lua 
b/apisix/plugins/ai-proxy-multi.lua
index bb32f5ffb..74a84d0e3 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -107,6 +107,13 @@ function _M.check_schema(conf)
             core.log.warn("fail to require ai provider: ", instance.provider, 
", err", err)
             return false, "ai provider: " .. instance.provider .. " is not 
supported."
         end
+        local sa_json = core.table.try_read_attr(instance, "auth", "gcp", 
"service_account_json")
+        if sa_json then
+            local _, err = core.json.decode(sa_json)
+            if err then
+                return false, "invalid gcp service_account_json: " .. err
+            end
+        end
     end
     local algo = core.table.try_read_attr(conf, "balancer", "algorithm")
     local hash_on = core.table.try_read_attr(conf, "balancer", "hash_on")
@@ -181,10 +188,15 @@ local function resolve_endpoint(instance_conf)
         port = tonumber(port)
     else
         local ai_driver = require("apisix.plugins.ai-drivers." .. 
instance_conf.provider)
-        -- built-in ai driver always use https
+        if ai_driver.get_node then
+            local node = ai_driver.get_node(instance_conf)
+            host = node.host
+            port = node.port
+        else
+            host = ai_driver.host
+            port = ai_driver.port
+        end
         scheme = "https"
-        host = ai_driver.host
-        port = ai_driver.port
     end
     local new_node = {
         host = host,
diff --git a/apisix/plugins/ai-proxy.lua b/apisix/plugins/ai-proxy.lua
index 092eb6a08..68bd36d8b 100644
--- a/apisix/plugins/ai-proxy.lua
+++ b/apisix/plugins/ai-proxy.lua
@@ -40,6 +40,13 @@ function _M.check_schema(conf)
         core.log.warn("fail to require ai provider: ", conf.provider, ", err", 
err)
         return false, "ai provider: " .. conf.provider .. " is not supported."
     end
+    local sa_json = core.table.try_read_attr(conf, "auth", "gcp", 
"service_account_json")
+    if sa_json then
+        local _, err = core.json.decode(sa_json)
+        if err then
+            return false, "invalid gcp service_account_json: " .. err
+        end
+    end
     return ok
 end
 
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index e6e3fd9f3..324ac2da5 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -59,10 +59,11 @@ function _M.before_proxy(conf, ctx, on_error)
         end
 
         local extra_opts = {
+            name = ai_instance.name,
             endpoint = core.table.try_read_attr(ai_instance, "override", 
"endpoint"),
-            query_params = ai_instance.auth.query or {},
-            headers = (ai_instance.auth.header or {}),
             model_options = ai_instance.options,
+            conf = ai_instance.provider_conf or {},
+            auth = ai_instance.auth,
         }
 
         if request_body.stream then
diff --git a/apisix/plugins/ai-proxy/schema.lua 
b/apisix/plugins/ai-proxy/schema.lua
index 89f857c0d..3ed0d9e93 100644
--- a/apisix/plugins/ai-proxy/schema.lua
+++ b/apisix/plugins/ai-proxy/schema.lua
@@ -33,6 +33,29 @@ local auth_schema = {
     patternProperties = {
         header = auth_item_schema,
         query = auth_item_schema,
+        gcp = {
+            type = "object",
+            description = 'Whether to use GCP service account for 
authentication,'
+            .. ' support set env GCP_SERVICE_ACCOUNT.',
+            properties = {
+                service_account_json = {
+                    type = "string",
+                    description = "GCP service account JSON content for 
authentication",
+                },
+                max_ttl = {
+                    type = "integer",
+                    minimum = 1,
+                    description = "Maximum TTL (in seconds) for GCP access 
token caching",
+                },
+                expire_early_secs = {
+                    type = "integer",
+                    minimum = 0,
+                    description = "Expire the access token early by specified 
seconds to avoid " ..
+                                                                "edge cases",
+                    default = 60,
+                },
+            }
+        },
     },
     additionalProperties = false,
 }
@@ -49,6 +72,21 @@ local model_options_schema = {
     additionalProperties = true,
 }
 
+local provider_vertex_ai_schema = {
+    type = "object",
+    properties = {
+        project_id = {
+            type = "string",
+            description = "Google Cloud Project ID",
+        },
+        region = {
+            type = "string",
+            description = "Google Cloud Region",
+        },
+    },
+    required = { "project_id", "region" },
+}
+
 local ai_instance_schema = {
     type = "array",
     minItems = 1,
@@ -94,7 +132,20 @@ local ai_instance_schema = {
                 required = {"active"}
             }
         },
-        required = {"name", "provider", "auth", "weight"}
+        required = {"name", "provider", "auth", "weight"},
+        ["if"] = {
+            properties = { provider = { enum = { "vertex-ai" } } },
+        },
+        ["then"] = {
+            properties = {
+                provider_conf = provider_vertex_ai_schema,
+            },
+            oneOf = {
+                { required = { "provider_conf" } },
+                { required = { "override" } },
+            },
+        },
+        ["else"] = {},
     },
 }
 
diff --git a/apisix/plugins/ai-request-rewrite.lua 
b/apisix/plugins/ai-request-rewrite.lua
index f0ebcf989..607afc35e 100644
--- a/apisix/plugins/ai-request-rewrite.lua
+++ b/apisix/plugins/ai-request-rewrite.lua
@@ -118,8 +118,7 @@ local function request_to_llm(conf, request_table, ctx)
 
     local extra_opts = {
         endpoint = core.table.try_read_attr(conf, "override", "endpoint"),
-        query_params = conf.auth.query or {},
-        headers = (conf.auth.header or {}),
+        auth = conf.auth,
         model_options = conf.options
     }
     ctx.llm_request_start_time = ngx.now()
diff --git a/apisix/utils/google-cloud-oauth.lua 
b/apisix/utils/google-cloud-oauth.lua
index 6cb352848..340f5a6df 100644
--- a/apisix/utils/google-cloud-oauth.lua
+++ b/apisix/utils/google-cloud-oauth.lua
@@ -76,6 +76,7 @@ function _M.refresh_access_token(self)
 
     self.access_token = res.access_token
     self.access_token_type = res.token_type
+    self.access_token_ttl = res.expires_in
     self.access_token_expire_time = get_timestamp() + res.expires_in
 end
 
@@ -121,6 +122,8 @@ function _M.new(config, ssl_verify)
         if type(config.scope) == "table" then
             oauth.scope = core.table.concat(config.scope, " ")
         end
+    else
+        oauth.scope = "https://www.googleapis.com/auth/cloud-platform";
     end
 
     return setmetatable(oauth, { __index = _M })
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md 
b/docs/en/latest/plugins/ai-proxy-multi.md
index c5883bf49..5c195ada2 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -7,7 +7,7 @@ keywords:
   - ai-proxy-multi
   - AI
   - LLM
-description: The ai-proxy-multi Plugin extends the capabilities of ai-proxy 
with load balancing, retries, fallbacks, and health chekcs, simplifying the 
integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, 
Gemini, and other OpenAI-compatible APIs.
+description: The ai-proxy-multi Plugin extends the capabilities of ai-proxy 
with load balancing, retries, fallbacks, and health chekcs, simplifying the 
integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, 
Gemini, Vertex AI, and other OpenAI-compatible APIs.
 ---
 
 <!--
@@ -35,7 +35,7 @@ description: The ai-proxy-multi Plugin extends the 
capabilities of ai-proxy with
 
 ## Description
 
-The `ai-proxy-multi` Plugin simplifies access to LLM and embedding models by 
transforming Plugin configurations into the designated request format for 
OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, and other 
OpenAI-compatible APIs. It extends the capabilities of 
[`ai-proxy`](./ai-proxy.md) with load balancing, retries, fallbacks, and health 
checks.
+The `ai-proxy-multi` Plugin simplifies access to LLM and embedding models by 
transforming Plugin configurations into the designated request format for 
OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, Vertex AI, and 
other OpenAI-compatible APIs. It extends the capabilities of 
[`ai-proxy`](./ai-proxy.md) with load balancing, retries, fallbacks, and health 
checks.
 
 In addition, the Plugin also supports logging LLM request information in the 
access log, such as token usage, model, time to the first response, and more.
 
@@ -58,7 +58,7 @@ In addition, the Plugin also supports logging LLM request 
information in the acc
 | balancer.key                       | string         | False    |             
                      |              | Used when `type` is `chash`. When 
`hash_on` is set to `header` or `cookie`, `key` is required. When `hash_on` is 
set to `consumer`, `key` is not required as the consumer name will be used as 
the key automatically. |
 | instances                          | array[object]  | True     |             
                      |              | LLM instance configurations. |
 | instances.name                     | string         | True     |             
                      |              | Name of the LLM service instance. |
-| instances.provider                 | string         | True     |             
                      | [openai, deepseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, openai-compatible] | LLM service provider. When set to 
`openai`, the Plugin will proxy the request to `api.openai.com`. When set to 
`deepseek`, the Plugin will proxy the request to `api.deepseek.com`. When set 
to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies the 
request to `api.aimlapi.com` by [...]
+| instances.provider                 | string         | True     |             
                      | [openai, deepseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, vertex-ai, openai-compatible] | LLM service provider. When 
set to `openai`, the Plugin will proxy the request to `api.openai.com`. When 
set to `deepseek`, the Plugin will proxy the request to `api.deepseek.com`. 
When set to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies 
the request to `api.aiml [...]
 | instances.priority                  | integer        | False    | 0          
                     |              | Priority of the LLM instance in load 
balancing. `priority` takes precedence over `weight`. |
 | instances.weight                    | string         | True     | 0          
                     | greater or equal to 0 | Weight of the LLM instance in 
load balancing. |
 | instances.auth                      | object         | True     |            
                       |              | Authentication configurations. |
diff --git a/docs/en/latest/plugins/ai-proxy.md 
b/docs/en/latest/plugins/ai-proxy.md
index cde04bc47..e09f0c072 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -7,7 +7,7 @@ keywords:
   - ai-proxy
   - AI
   - LLM
-description: The ai-proxy Plugin simplifies access to LLM and embedding models 
providers by converting Plugin configurations into the required request format 
for OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, and other 
OpenAI-compatible APIs.
+description: The ai-proxy Plugin simplifies access to LLM and embedding models 
providers by converting Plugin configurations into the required request format 
for OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, OpenRouter, Gemini, Vertex AI, 
and other OpenAI-compatible APIs.
 ---
 
 <!--
@@ -35,7 +35,7 @@ description: The ai-proxy Plugin simplifies access to LLM and 
embedding models p
 
 ## Description
 
-The `ai-proxy` Plugin simplifies access to LLM and embedding models by 
transforming Plugin configurations into the designated request format. It 
supports the integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, 
OpenRouter, Gemini, and other OpenAI-compatible APIs.
+The `ai-proxy` Plugin simplifies access to LLM and embedding models by 
transforming Plugin configurations into the designated request format. It 
supports the integration with OpenAI, DeepSeek, Azure, AIMLAPI, Anthropic, 
OpenRouter, Gemini, Vertex AI, and other OpenAI-compatible APIs.
 
 In addition, the Plugin also supports logging LLM request information in the 
access log, such as token usage, model, time to the first response, and more.
 
@@ -51,7 +51,7 @@ In addition, the Plugin also supports logging LLM request 
information in the acc
 
 | Name               | Type    | Required | Default | Valid values             
                 | Description |
 
|--------------------|--------|----------|---------|------------------------------------------|-------------|
-| provider          | string  | True     |         | [openai, deepseek, 
azure-openai, aimlapi, anthropic, openrouter, gemini, openai-compatible] | LLM 
service provider. When set to `openai`, the Plugin will proxy the request to 
`https://api.openai.com/chat/completions`. When set to `deepseek`, the Plugin 
will proxy the request to `https://api.deepseek.com/chat/completions`. When set 
to `aimlapi`, the Plugin uses the OpenAI-compatible driver and proxies the 
request to `https://api.aimlapi [...]
+| provider          | string  | True     |         | [openai, deepseek, 
azure-openai, aimlapi, anthropic, openrouter, gemini, vertex-ai, 
openai-compatible] | LLM service provider. When set to `openai`, the Plugin 
will proxy the request to `https://api.openai.com/chat/completions`. When set 
to `deepseek`, the Plugin will proxy the request to 
`https://api.deepseek.com/chat/completions`. When set to `aimlapi`, the Plugin 
uses the OpenAI-compatible driver and proxies the request to `https:// [...]
 | auth             | object  | True     |         |                            
              | Authentication configurations. |
 | auth.header      | object  | False    |         |                            
              | Authentication headers. At least one of `header` or `query` 
must be configured. |
 | auth.query       | object  | False    |         |                            
              | Authentication query parameters. At least one of `header` or 
`query` must be configured. |
diff --git a/docs/en/latest/plugins/ai-request-rewrite.md 
b/docs/en/latest/plugins/ai-request-rewrite.md
index c5d0d1f5b..955db9734 100644
--- a/docs/en/latest/plugins/ai-request-rewrite.md
+++ b/docs/en/latest/plugins/ai-request-rewrite.md
@@ -36,7 +36,7 @@ The `ai-request-rewrite` plugin intercepts client requests 
before they are forwa
 | **Field**                 | **Required** | **Type** | **Description**        
                                                              |
 | ------------------------- | ------------ | -------- | 
------------------------------------------------------------------------------------
 |
 | prompt                    | Yes          | String   | The prompt send to LLM 
service.                                                      |
-| provider                  | Yes          | String   | Name of the LLM 
service. Available options: openai, deekseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, and openai-compatible. When `aimlapi` is selected, the 
plugin uses the OpenAI-compatible driver with a default endpoint of 
`https://api.aimlapi.com/v1/chat/completions`.   |
+| provider                  | Yes          | String   | Name of the LLM 
service. Available options: openai, deekseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, vertex-ai, and openai-compatible. When `aimlapi` is 
selected, the plugin uses the OpenAI-compatible driver with a default endpoint 
of `https://api.aimlapi.com/v1/chat/completions`.   |
 | auth                      | Yes          | Object   | Authentication 
configuration                                                         |
 | auth.header               | No           | Object   | Authentication 
headers. Key must match pattern `^[a-zA-Z0-9._-]+$`.                  |
 | auth.query                | No           | Object   | Authentication query 
parameters. Key must match pattern `^[a-zA-Z0-9._-]+$`.         |
diff --git a/docs/zh/latest/plugins/ai-proxy-multi.md 
b/docs/zh/latest/plugins/ai-proxy-multi.md
index 3143d9704..40f7157b3 100644
--- a/docs/zh/latest/plugins/ai-proxy-multi.md
+++ b/docs/zh/latest/plugins/ai-proxy-multi.md
@@ -7,7 +7,7 @@ keywords:
   - ai-proxy-multi
   - AI
   - LLM
-description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和健康检查扩展了 ai-proxy 的功能，简化了与 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容 API 的集成。
+description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和健康检查扩展了 ai-proxy 的功能，简化了与 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI 
兼容 API 的集成。
 ---
 
 <!--
@@ -35,7 +35,7 @@ description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和
 
 ## 描述
 
-`ai-proxy-multi` 插件通过将插件配置转换为 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容 API 
的指定请求格式，简化了对 LLM 和嵌入模型的访问。它通过负载均衡、重试、故障转移和健康检查扩展了 [`ai-proxy`](./ai-proxy.md) 
的功能。
+`ai-proxy-multi` 插件通过将插件配置转换为 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI 
兼容 API 的指定请求格式，简化了对 LLM 和嵌入模型的访问。它通过负载均衡、重试、故障转移和健康检查扩展了 
[`ai-proxy`](./ai-proxy.md) 的功能。
 
 此外，该插件还支持在访问日志中记录 LLM 请求信息，如令牌使用量、模型、首次响应时间等。
 
@@ -58,7 +58,7 @@ description: ai-proxy-multi 插件通过负载均衡、重试、故障转移和
 | balancer.key                       | string         | 否    |                 
                  |              | 当 `type` 为 `chash` 时使用。当 `hash_on` 设置为 
`header` 或 `cookie` 时，需要 `key`。当 `hash_on` 设置为 `consumer` 时，不需要 
`key`，因为消费者名称将自动用作键。 |
 | instances                          | array[object]  | 是     |                
                   |              | LLM 实例配置。 |
 | instances.name                     | string         | 是     |                
                   |              | LLM 服务实例的名称。 |
-| instances.provider                 | string         | 是     |                
                   | [openai, deepseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, openai-compatible] | LLM 服务提供商。设置为 `openai` 时，插件将代理请求到 
`api.openai.com`。设置为 `deepseek` 时，插件将代理请求到 `api.deepseek.com`。设置为 `aimlapi` 
时，插件使用 OpenAI 兼容驱动程序，默认将请求代理到 `api.aimlapi.com`。设置为 `anthropic` 时，插件使用 OpenAI 
兼容驱动程序，默认将请求代理到 `api.anthropic.com`。设置为 `openrouter` 时，插件使用 OpenAI 
兼容驱动程序，默认将请求代理到 `openrouter.ai`。设置为 `gemini [...]
+| instances.provider                 | string         | 是     |                
                   | [openai, deepseek, azure-openai, aimlapi, anthropic, 
openrouter, gemini, vertex-ai, openai-compatible] | LLM 服务提供商。设置为 `openai` 
时，插件将代理请求到 `api.openai.com`。设置为 `deepseek` 时，插件将代理请求到 `api.deepseek.com`。设置为 
`aimlapi` 时，插件使用 OpenAI 兼容驱动程序，默认将请求代理到 `api.aimlapi.com`。设置为 `anthropic` 
时，插件使用 OpenAI 兼容驱动程序，默认将请求代理到 `api.anthropic.com`。设置为 `openrouter` 时，插件使用 
OpenAI 兼容驱动程序，默认将请求代理到 `openrouter.ai`。 [...]
 | instances.priority                  | integer        | 否    | 0              
                 |              | LLM 实例在负载均衡中的优先级。`priority` 优先于 `weight`。 |
 | instances.weight                    | string         | 是     | 0             
                  | 大于或等于 0 | LLM 实例在负载均衡中的权重。 |
 | instances.auth                      | object         | 是     |               
                    |              | 身份验证配置。 |
diff --git a/docs/zh/latest/plugins/ai-proxy.md 
b/docs/zh/latest/plugins/ai-proxy.md
index dea580ae2..8035237d7 100644
--- a/docs/zh/latest/plugins/ai-proxy.md
+++ b/docs/zh/latest/plugins/ai-proxy.md
@@ -7,7 +7,7 @@ keywords:
   - ai-proxy
   - AI
   - LLM
-description: ai-proxy 插件通过将插件配置转换为所需的请求格式，简化了对 LLM 和嵌入模型提供商的访问，支持 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容的 API。
+description: ai-proxy 插件通过将插件配置转换为所需的请求格式，简化了对 LLM 和嵌入模型提供商的访问，支持 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI 
兼容的 API。
 ---
 
 <!--
@@ -35,7 +35,7 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
 
 ## 描述
 
-`ai-proxy` 插件通过将插件配置转换为指定的请求格式，简化了对 LLM 和嵌入模型的访问。它支持与 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini 和其他 OpenAI 兼容的 API 集成。
+`ai-proxy` 插件通过将插件配置转换为指定的请求格式，简化了对 LLM 和嵌入模型的访问。它支持与 
OpenAI、DeepSeek、Azure、AIMLAPI、Anthropic、OpenRouter、Gemini、Vertex AI 和其他 OpenAI 
兼容的 API 集成。
 
 此外，该插件还支持在访问日志中记录 LLM 请求信息，如令牌使用量、模型、首次响应时间等。
 
@@ -51,7 +51,7 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
 
 | 名称               | 类型    | 必选项 | 默认值 | 有效值                              | 描述 
|
 
|--------------------|--------|----------|---------|------------------------------------------|-------------|
-| provider          | string  | 是     |         | [openai, deepseek, 
azure-openai, aimlapi, anthropic, openrouter, gemini, openai-compatible] | LLM 
服务提供商。当设置为 `openai` 时，插件将代理请求到 `https://api.openai.com/chat/completions`。当设置为 
`deepseek` 时，插件将代理请求到 `https://api.deepseek.com/chat/completions`。当设置为 
`aimlapi` 时，插件使用 OpenAI 兼容驱动程序，默认将请求代理到 
`https://api.aimlapi.com/v1/chat/completions`。当设置为 `anthropic` 时，插件将代理请求到 
`https://api.anthropic.com/v1/chat/completions`。当设置为 `openrouter` 时，插件使用 OpenAI 
兼 [...]
+| provider          | string  | 是     |         | [openai, deepseek, 
azure-openai, aimlapi, anthropic, openrouter, gemini, vertex-ai, 
openai-compatible] | LLM 服务提供商。当设置为 `openai` 时，插件将代理请求到 
`https://api.openai.com/chat/completions`。当设置为 `deepseek` 时，插件将代理请求到 
`https://api.deepseek.com/chat/completions`。当设置为 `aimlapi` 时，插件使用 OpenAI 
兼容驱动程序，默认将请求代理到 `https://api.aimlapi.com/v1/chat/completions`。当设置为 `anthropic` 
时，插件将代理请求到 `https://api.anthropic.com/v1/chat/completions`。当设置为 `openrouter` 
时，插件 [...]
 | auth             | object  | 是     |         |                               
           | 身份验证配置。 |
 | auth.header      | object  | 否    |         |                                
          | 身份验证标头。必须配置 `header` 或 `query` 中的至少一个。 |
 | auth.query       | object  | 否    |         |                                
          | 身份验证查询参数。必须配置 `header` 或 `query` 中的至少一个。 |
diff --git a/docs/zh/latest/plugins/ai-request-rewrite.md 
b/docs/zh/latest/plugins/ai-request-rewrite.md
index 1e8d1cf8a..d98630d93 100644
--- a/docs/zh/latest/plugins/ai-request-rewrite.md
+++ b/docs/zh/latest/plugins/ai-request-rewrite.md
@@ -36,7 +36,7 @@ description: ai-request-rewrite 插件在客户端请求转发到上游服务之
 | **字段**                 | **必选项** | **类型** | **描述**                           
                                           |
 | ------------------------- | ------------ | -------- | 
------------------------------------------------------------------------------------
 |
 | prompt                    | 是          | String   | 发送到 LLM 服务的提示。           
                                           |
-| provider                  | 是          | String   | LLM 
服务的名称。可用选项：openai、deekseek、azure-openai、aimlapi、anthropic、openrouter、gemini 和 
openai-compatible。当选择 `aimlapi` 时，插件使用 OpenAI 兼容驱动程序，默认端点为 
`https://api.aimlapi.com/v1/chat/completions`。   |
+| provider                  | 是          | String   | LLM 
服务的名称。可用选项：openai、deekseek、azure-openai、aimlapi、anthropic、openrouter、gemini、vertex-ai
 和 openai-compatible。当选择 `aimlapi` 时，插件使用 OpenAI 兼容驱动程序，默认端点为 
`https://api.aimlapi.com/v1/chat/completions`。   |
 | auth                      | 是          | Object   | 身份验证配置                   
                                      |
 | auth.header               | 否           | Object   | 身份验证头部。键必须匹配模式 
`^[a-zA-Z0-9._-]+$`。                  |
 | auth.query                | 否           | Object   | 身份验证查询参数。键必须匹配模式 
`^[a-zA-Z0-9._-]+$`。         |
diff --git a/t/plugin/ai-proxy-vertex-ai.t b/t/plugin/ai-proxy-vertex-ai.t
new file mode 100644
index 000000000..615c805fd
--- /dev/null
+++ b/t/plugin/ai-proxy-vertex-ai.t
@@ -0,0 +1,572 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("debug");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+
+my $resp_file = 't/assets/ai-proxy-response.json';
+open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
+my $resp = do { local $/; <$fh> };
+close($fh);
+
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $user_yaml_config = <<_EOC_;
+plugins:
+  - ai-proxy-multi
+  - prometheus
+_EOC_
+    $block->set_value("extra_yaml_config", $user_yaml_config);
+
+    my $extra_init_worker_by_lua = <<_EOC_;
+    local gcp_accesstoken = require "apisix.utils.google-cloud-oauth"
+    local ttl = 0
+    gcp_accesstoken.refresh_access_token = function(self)
+        ngx.log(ngx.NOTICE, "[test] mocked gcp_accesstoken called")
+        ttl = ttl + 5
+        self.access_token_ttl = ttl
+        self.access_token = "ya29.c.Kp8B..."
+    end
+_EOC_
+
+    $block->set_value("extra_init_worker_by_lua", $extra_init_worker_by_lua);
+
+
+    my $http_config = $block->http_config // <<_EOC_;
+        server {
+            server_name openai;
+            listen 6724;
+
+            default_type 'application/json';
+
+            location /v1/chat/completions {
+                content_by_lua_block {
+                    local json = require("toolkit.json")
+
+                    if ngx.req.get_method() ~= "POST" then
+                        ngx.status = 400
+                        ngx.say("Unsupported request method: ", 
ngx.req.get_method())
+                    end
+                    ngx.req.read_body()
+                    local body, err = ngx.req.get_body_data()
+                    body, err = json.decode(body)
+
+                    if body and body.instances then
+                        local vertex_response = {
+                            ["predictions"] = {
+                                {
+                                    ["embeddings"] = {
+                                        ["statistics"] = {
+                                            ["token_count"] = 7
+                                        },
+                                        ["values"] = {
+                                            0.0123,
+                                            -0.0456,
+                                            0.0789,
+                                            0.0012
+                                        }
+                                    }
+                                },
+                            }
+                        }
+                        local body = json.encode(vertex_response)
+                        ngx.status = 200
+                        ngx.say(body)
+                        return
+                    end
+
+                    local test_type = ngx.req.get_headers()["test-type"]
+                    if test_type == "options" then
+                        if body.foo == "bar" then
+                            ngx.status = 200
+                            ngx.say("options works")
+                        else
+                            ngx.status = 500
+                            ngx.say("model options feature doesn't work")
+                        end
+                        return
+                    end
+
+                    local header_auth = ngx.req.get_headers()["authorization"]
+                    local query_auth = ngx.req.get_uri_args()["apikey"]
+
+                    if header_auth ~= "Bearer token" and query_auth ~= 
"apikey" and header_auth ~= "Bearer ya29.c.Kp8B..." then
+                        ngx.status = 401
+                        ngx.say("Unauthorized")
+                        return
+                    end
+
+                    if header_auth == "Bearer token" or query_auth == "apikey" 
or header_auth == "Bearer ya29.c.Kp8B..." then
+                        if header_auth == "Bearer ya29.c.Kp8B..." then
+                            ngx.log(ngx.NOTICE, "[test] GCP service account 
auth works")
+                        end
+                        ngx.req.read_body()
+                        local body, err = ngx.req.get_body_data()
+                        body, err = json.decode(body)
+
+                        if not body.messages or #body.messages < 1 then
+                            ngx.status = 400
+                            ngx.say([[{ "error": "bad request"}]])
+                            return
+                        end
+                        if body.messages[1].content == "write an SQL query to 
get all rows from student table" then
+                            ngx.print("SELECT * FROM STUDENTS")
+                            return
+                        end
+
+                        ngx.status = 200
+                        ngx.say([[$resp]])
+                        return
+                    end
+
+
+                    ngx.status = 503
+                    ngx.say("reached the end of the test suite")
+                }
+            }
+
+            location /random {
+                content_by_lua_block {
+                    ngx.say("path override works")
+                }
+            }
+
+            location ~ ^/status.* {
+                content_by_lua_block {
+                    local test_dict = ngx.shared["test"]
+                    local uri = ngx.var.uri
+                    local total_key = uri .. "#total"
+                    local count_key = uri .. "#count"
+                    local total = test_dict:get(total_key)
+                    if not total then
+                        return
+                    end
+
+                    local count = test_dict:incr(count_key, 1, 0)
+                    ngx.log(ngx.INFO, "uri: ", uri, " total: ", total, " 
count: ", count)
+                    if count < total then
+                        return
+                    end
+                    ngx.status = 500
+                    ngx.say("error")
+                }
+            }
+        }
+_EOC_
+
+    $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: set route with right auth header
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "vertex-ai",
+                                    "provider": "vertex-ai",
+                                    "weight": 1,
+                                    "auth": {
+                                        "header": {
+                                            "Authorization": "Bearer token"
+                                        }
+                                    },
+                                    "options": {
+                                        "model": "gemini-2.0-flash",
+                                        "max_tokens": 512,
+                                        "temperature": 1.0
+                                    },
+                                    "override": {
+                                        "endpoint": 
"http://localhost:6724/v1/chat/completions";
+                                    }
+                                }
+                            ],
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 2: send request
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+
+
+
+=== TEST 3: request embeddings, check values field in response
+--- request
+POST /anything
+{"input": "Your text string goes here"}
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"embedding":\[0.0123,-0.0456,0.0789,0.0012\]/
+
+
+
+=== TEST 4: request embeddings, check token_count field in response
+--- request
+POST /anything
+{"input": "Your text string goes here"}
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"total_tokens":7/
+
+
+
+=== TEST 5: set route with right auth gcp service account
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "vertex-ai",
+                                    "provider": "vertex-ai",
+                                    "weight": 1,
+                                    "auth": {
+                                        "gcp": { "max_ttl": 8 }
+                                    },
+                                    "options": {
+                                        "model": "gemini-2.0-flash",
+                                        "max_tokens": 512,
+                                        "temperature": 1.0
+                                    },
+                                    "override": {
+                                        "endpoint": 
"http://localhost:6724/v1/chat/completions";
+                                    }
+                                }
+                            ],
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 6: send request
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
+--- error_code: 200
+--- error_log
+[test] GCP service account auth works
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+
+
+
+=== TEST 7: check gcp access token caching works
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local core = require("apisix.core")
+            local send_request = function()
+                local code, _, body = t("/anything",
+                    ngx.HTTP_POST,
+                    [[{
+                        "messages": [
+                            { "role": "system", "content": "You are a 
mathematician" },
+                            { "role": "user", "content": "What is 1+1?" }
+                        ]
+                    }]],
+                    nil,
+                    {
+                        ["Content-Type"] = "application/json",
+                    }
+                )
+                assert(code == 200, "request should be successful")
+                return body
+            end
+            for i = 1, 6 do
+                send_request()
+            end
+
+            ngx.sleep(5.5)
+            send_request()
+
+            ngx.say("passed")
+        }
+    }
+--- timeout: 7
+--- response_body
+passed
+--- error_log
+[test] mocked gcp_accesstoken called
+[test] mocked gcp_accesstoken called
+set gcp access token in cache with ttl: 5
+set gcp access token in cache with ttl: 8
+
+
+
+=== TEST 8: set route with multiple instances and gcp service account
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "vertex-ai-one",
+                                    "provider": "vertex-ai",
+                                    "weight": 1,
+                                    "auth": {
+                                        "gcp": {}
+                                    },
+                                    "options": {
+                                        "model": "gemini-2.0-flash",
+                                        "max_tokens": 512,
+                                        "temperature": 1.0
+                                    },
+                                    "override": {
+                                        "endpoint": 
"http://localhost:6724/v1/chat/completions";
+                                    }
+                                },
+                                {
+                                    "name": "vertex-ai-multi",
+                                    "provider": "vertex-ai",
+                                    "weight": 1,
+                                    "auth": {
+                                        "gcp": {}
+                                    },
+                                    "options": {
+                                        "model": "gemini-2.0-flash",
+                                        "max_tokens": 512,
+                                        "temperature": 1.0
+                                    },
+                                    "override": {
+                                        "endpoint": 
"http://localhost:6724/v1/chat/completions";
+                                    }
+                                }
+                            ],
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 9: check gcp access token caching works
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local core = require("apisix.core")
+            local send_request = function()
+                local code, _, body = t("/anything",
+                    ngx.HTTP_POST,
+                    [[{
+                        "messages": [
+                            { "role": "system", "content": "You are a 
mathematician" },
+                            { "role": "user", "content": "What is 1+1?" }
+                        ]
+                    }]],
+                    nil,
+                    {
+                        ["Content-Type"] = "application/json",
+                    }
+                )
+                assert(code == 200, "request should be successful")
+                return body
+            end
+            for i = 1, 12 do
+                send_request()
+            end
+
+            ngx.say("passed")
+        }
+    }
+--- timeout: 7
+--- response_body
+passed
+--- error_log
+#vertex-ai-one
+#vertex-ai-multi
+
+
+
+=== TEST 10: set ai-proxy-multi with health checks
+--- config
+    location /t {
+        content_by_lua_block {
+            local checks = [[
+            "checks": {
+                "active": {
+                    "timeout": 5,
+                    "http_path": "/status/gpt4",
+                    "host": "foo.com",
+                    "healthy": {
+                        "interval": 1,
+                        "successes": 1
+                    },
+                    "unhealthy": {
+                        "interval": 1,
+                        "http_failures": 1
+                    },
+                    "req_headers": ["User-Agent: curl/7.29.0"]
+                }
+            }]]
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 string.format([[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "vertex-ai",
+                                    "provider": "vertex-ai",
+                                    "weight": 1,
+                                    "priority": 2,
+                                    "auth": {
+                                        "header": {
+                                            "Authorization": "Bearer token"
+                                        }
+                                    },
+                                    "options": {
+                                        "model": "gemini-2.0-flash",
+                                        "max_tokens": 512,
+                                        "temperature": 1.0
+                                    },
+                                    "override": {
+                                        "endpoint": 
"http://localhost:6724/v1/chat/completions";
+                                    },
+                                    %s
+                                },
+                                {
+                                    "name": "openai-gpt3",
+                                    "provider": "openai",
+                                    "weight": 1,
+                                    "priority": 1,
+                                    "auth": {
+                                        "header": {
+                                            "Authorization": "Bearer token"
+                                        }
+                                    },
+                                    "options": {
+                                        "model": "gpt-3"
+                                    },
+                                    "override": {
+                                        "endpoint": "http://localhost:6724";
+                                    }
+                                }
+                            ],
+                            "ssl_verify": false
+                        }
+                    }
+                }]], checks)
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 11: check health check works
+--- wait: 5
+--- request
+POST /anything
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body eval
+qr/"content":"1 \+ 1 = 2\."/
+--- error_log
+creating healthchecker for upstream
+request head: GET /status/gpt4

(apisix) branch master updated: feat: support vertex-ai (#12933)

Reply via email to