(apisix) branch master updated: feat: ai-rate-limiting plugin (#12037)

shreemaanabhishek Thu, 13 Mar 2025 01:12:38 -0700

This is an automated email from the ASF dual-hosted git repository.

shreemaanabhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git



The following commit(s) were added to refs/heads/master by this push:
     new 52d8fea3d feat: ai-rate-limiting plugin (#12037)
52d8fea3d is described below

commit 52d8fea3dbc93c4b59ac6682902955c6146af4ad
Author: Shreemaan Abhishek <[email protected]>
AuthorDate: Thu Mar 13 13:56:06 2025 +0545

    feat: ai-rate-limiting plugin (#12037)
---
 apisix/cli/config.lua                      |   3 +
 apisix/cli/ngx_tpl.lua                     |  14 +-
 apisix/plugins/ai-rate-limiting.lua        | 209 +++++++++++
 apisix/plugins/limit-count/init.lua        |  30 +-
 conf/config.yaml.example                   |   1 +
 docs/en/latest/config.json                 |   3 +-
 docs/en/latest/plugins/ai-rate-limiting.md | 117 +++++++
 t/APISIX.pm                                |   2 +
 t/admin/plugins.t                          |   1 +
 t/plugin/ai-rate-limiting.t                | 539 +++++++++++++++++++++++++++++
 10 files changed, 907 insertions(+), 12 deletions(-)

diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index 56af978c2..8cca713e0 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -160,6 +160,8 @@ local _M = {
         ["plugin-limit-req-redis-cluster-slot-lock"] = "1m",
         ["plugin-limit-count-redis-cluster-slot-lock"] = "1m",
         ["plugin-limit-conn-redis-cluster-slot-lock"] = "1m",
+        ["plugin-ai-rate-limiting"] = "10m",
+        ["plugin-ai-rate-limiting-reset-header"] = "10m",
         tracing_buffer = "10m",
         ["plugin-api-breaker"] = "10m",
         ["etcd-cluster-health-check"] = "10m",
@@ -219,6 +221,7 @@ local _M = {
     "ai-prompt-decorator",
     "ai-prompt-guard",
     "ai-rag",
+    "ai-rate-limiting",
     "ai-proxy-multi",
     "ai-proxy",
     "ai-aws-content-moderation",
diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
index 4b7ff4102..dec8f7172 100644
--- a/apisix/cli/ngx_tpl.lua
+++ b/apisix/cli/ngx_tpl.lua
@@ -287,6 +287,19 @@ http {
     lua_shared_dict tars {* http.lua_shared_dict["tars"] *};
     {% end %}
 
+
+    {% if http.lua_shared_dict["plugin-ai-rate-limiting"] then %}
+    lua_shared_dict plugin-ai-rate-limiting {* 
http.lua_shared_dict["plugin-ai-rate-limiting"] *};
+    {% else %}
+    lua_shared_dict plugin-ai-rate-limiting 10m;
+    {% end %}
+
+    {% if http.lua_shared_dict["plugin-ai-rate-limiting"] then %}
+    lua_shared_dict plugin-ai-rate-limiting-reset-header {* 
http.lua_shared_dict["plugin-ai-rate-limiting-reset-header"] *};
+    {% else %}
+    lua_shared_dict plugin-ai-rate-limiting-reset-header 10m;
+    {% end %}
+
     {% if enabled_plugins["limit-conn"] then %}
     lua_shared_dict plugin-limit-conn {* 
http.lua_shared_dict["plugin-limit-conn"] *};
     lua_shared_dict plugin-limit-conn-redis-cluster-slot-lock {* 
http.lua_shared_dict["plugin-limit-conn-redis-cluster-slot-lock"] *};
@@ -418,7 +431,6 @@ http {
     {% if ssl.ssl_trusted_certificate ~= nil then %}
     lua_ssl_trusted_certificate {* ssl.ssl_trusted_certificate *};
     {% end %}
-
     # http configuration snippet starts
     {% if http_configuration_snippet then %}
     {* http_configuration_snippet *}
diff --git a/apisix/plugins/ai-rate-limiting.lua 
b/apisix/plugins/ai-rate-limiting.lua
new file mode 100644
index 000000000..374c03997
--- /dev/null
+++ b/apisix/plugins/ai-rate-limiting.lua
@@ -0,0 +1,209 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local require = require
+local setmetatable = setmetatable
+local ipairs = ipairs
+local type = type
+local core = require("apisix.core")
+local limit_count = require("apisix.plugins.limit-count.init")
+
+local plugin_name = "ai-rate-limiting"
+
+local instance_limit_schema = {
+    type = "object",
+    properties = {
+        name = {type = "string"},
+        limit = {type = "integer", minimum = 1},
+        time_window = {type = "integer", minimum = 1}
+    },
+    required = {"name", "limit", "time_window"}
+}
+
+local schema = {
+    type = "object",
+    properties = {
+        limit = {type = "integer", exclusiveMinimum = 0},
+        time_window = {type = "integer",  exclusiveMinimum = 0},
+        show_limit_quota_header = {type = "boolean", default = true},
+        limit_strategy = {
+            type = "string",
+            enum = {"total_tokens", "prompt_tokens", "completion_tokens"},
+            default = "total_tokens",
+            description = "The strategy to limit the tokens"
+        },
+        instances = {
+            type = "array",
+            items = instance_limit_schema
+        },
+        rejected_code = {
+            type = "integer", minimum = 200, maximum = 599, default = 503
+        },
+        rejected_msg = {
+            type = "string", minLength = 1
+        },
+    },
+    required = {"limit", "time_window"},
+}
+
+local _M = {
+    version = 0.1,
+    priority = 1030,
+    name = plugin_name,
+    schema = schema
+}
+
+local limit_conf_cache = core.lrucache.new({
+    ttl = 300, count = 512
+})
+
+
+function _M.check_schema(conf)
+    return core.schema.check(schema, conf)
+end
+
+
+local function transform_limit_conf(plugin_conf, instance_conf, instance_name)
+    local key = plugin_name .. "#global"
+    local limit = plugin_conf.limit
+    local time_window = plugin_conf.time_window
+    local name = instance_name or ""
+    if instance_conf then
+        name = instance_conf.name
+        key = instance_conf.name
+        limit = instance_conf.limit
+        time_window = instance_conf.time_window
+    end
+    return {
+        _vid = key,
+
+        key = key,
+        count = limit,
+        time_window = time_window,
+        rejected_code = plugin_conf.rejected_code,
+        rejected_msg = plugin_conf.rejected_msg,
+        show_limit_quota_header = plugin_conf.show_limit_quota_header,
+        -- limit-count need these fields
+        policy = "local",
+        key_type = "constant",
+        allow_degradation = false,
+        sync_interval = -1,
+
+        limit_header = "X-AI-RateLimit-Limit-" .. name,
+        remaining_header = "X-AI-RateLimit-Remaining-" .. name,
+        reset_header = "X-AI-RateLimit-Reset-" .. name,
+    }
+end
+
+
+local function fetch_limit_conf_kvs(conf)
+    local mt = {
+        __index = function(t, k)
+            local limit_conf = transform_limit_conf(conf, nil, k)
+            t[k] = limit_conf
+            return limit_conf
+        end
+    }
+    local limit_conf_kvs = setmetatable({}, mt)
+    local conf_instances = conf.instances or {}
+    for _, limit_conf in ipairs(conf_instances) do
+        limit_conf_kvs[limit_conf.name] = transform_limit_conf(conf, 
limit_conf)
+    end
+    return limit_conf_kvs
+end
+
+
+function _M.access(conf, ctx)
+    local ai_instance_name = ctx.picked_ai_instance_name
+    if not ai_instance_name then
+        return
+    end
+
+    local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, 
conf)
+    local limit_conf = limit_conf_kvs[ai_instance_name]
+    local code, msg = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, 
true)
+    ctx.ai_rate_limiting = code and true or false
+    return code, msg
+end
+
+
+function _M.check_instance_status(conf, ctx, instance_name)
+    if conf == nil then
+        local plugins = ctx.plugins
+        for _, plugin in ipairs(plugins) do
+            if plugin.name == plugin_name then
+                conf = plugin
+            end
+        end
+    end
+    if not conf then
+        return true
+    end
+
+    instance_name = instance_name or ctx.picked_ai_instance_name
+    if not instance_name then
+        return nil, "missing instance_name"
+    end
+
+    if type(instance_name) ~= "string" then
+        return nil, "invalid instance_name"
+    end
+
+    local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, 
conf)
+    local limit_conf = limit_conf_kvs[instance_name]
+    local code, _ = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, 
true)
+    if code then
+        core.log.info("rate limit for instance: ", instance_name, " code: ", 
code)
+        return false
+    end
+    return true
+end
+
+
+local function get_token_usage(conf, ctx)
+    local usage = ctx.ai_token_usage
+    if not usage then
+        return
+    end
+    return usage[conf.limit_strategy]
+end
+
+
+function _M.log(conf, ctx)
+    local instance_name = ctx.picked_ai_instance_name
+    if not instance_name then
+        return
+    end
+
+    if ctx.ai_rate_limiting then
+        return
+    end
+
+    local used_tokens = get_token_usage(conf, ctx)
+    if not used_tokens then
+        core.log.error("failed to get token usage for llm service")
+        return
+    end
+
+    core.log.info("instance name: ", instance_name, " used tokens: ", 
used_tokens)
+
+    local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, 
conf)
+    local limit_conf = limit_conf_kvs[instance_name]
+    limit_count.rate_limit(limit_conf, ctx, plugin_name, used_tokens)
+end
+
+
+return _M
diff --git a/apisix/plugins/limit-count/init.lua 
b/apisix/plugins/limit-count/init.lua
index 08a4c9763..1f37965c4 100644
--- a/apisix/plugins/limit-count/init.lua
+++ b/apisix/plugins/limit-count/init.lua
@@ -21,6 +21,7 @@ local ipairs = ipairs
 local pairs = pairs
 local redis_schema = require("apisix.utils.redis-schema")
 local policy_to_additional_properties = redis_schema.schema
+local get_phase = ngx.get_phase
 
 local limit_redis_cluster_new
 local limit_redis_new
@@ -233,8 +234,9 @@ local function gen_limit_obj(conf, ctx, plugin_name)
     return core.lrucache.plugin_ctx(lrucache, ctx, extra_key, 
create_limit_obj, conf, plugin_name)
 end
 
-function _M.rate_limit(conf, ctx, name, cost)
+function _M.rate_limit(conf, ctx, name, cost, dry_run)
     core.log.info("ver: ", ctx.conf_version)
+    core.log.info("conf: ", core.json.delay_encode(conf, true))
 
     local lim, err = gen_limit_obj(conf, ctx, name)
 
@@ -275,7 +277,7 @@ function _M.rate_limit(conf, ctx, name, cost)
 
     local delay, remaining, reset
     if not conf.policy or conf.policy == "local" then
-        delay, remaining, reset = lim:incoming(key, true, conf, cost)
+        delay, remaining, reset = lim:incoming(key, not dry_run, conf, cost)
     else
         delay, remaining, reset = lim:incoming(key, cost)
     end
@@ -288,14 +290,22 @@ function _M.rate_limit(conf, ctx, name, cost)
     end
     core.log.info("limit-count plugin-metadata: ", 
core.json.delay_encode(metadata))
 
+    local set_limit_headers = {
+        limit_header = conf.limit_header or metadata.limit_header,
+        remaining_header = conf.remaining_header or metadata.remaining_header,
+        reset_header = conf.reset_header or metadata.reset_header,
+    }
+    local phase = get_phase()
+    local set_header = phase ~= "log"
+
     if not delay then
         local err = remaining
         if err == "rejected" then
             -- show count limit header when rejected
-            if conf.show_limit_quota_header then
-                core.response.set_header(metadata.limit_header, conf.count,
-                    metadata.remaining_header, 0,
-                    metadata.reset_header, reset)
+            if conf.show_limit_quota_header and set_header then
+                core.response.set_header(set_limit_headers.limit_header, 
conf.count,
+                set_limit_headers.remaining_header, 0,
+                set_limit_headers.reset_header, reset)
             end
 
             if conf.rejected_msg then
@@ -311,10 +321,10 @@ function _M.rate_limit(conf, ctx, name, cost)
         return 500, {error_msg = "failed to limit count"}
     end
 
-    if conf.show_limit_quota_header then
-        core.response.set_header(metadata.limit_header, conf.count,
-            metadata.remaining_header, remaining,
-            metadata.reset_header, reset)
+    if conf.show_limit_quota_header and set_header then
+        core.response.set_header(set_limit_headers.limit_header, conf.count,
+            set_limit_headers.remaining_header, remaining,
+            set_limit_headers.reset_header, reset)
     end
 end
 
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index c0da9c0bf..22035f8da 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -480,6 +480,7 @@ plugins:                           # plugin list (sorted by 
priority)
   - ai-prompt-decorator            # priority: 1070
   - ai-prompt-guard                # priority: 1072
   - ai-rag                         # priority: 1060
+  - ai-rate-limiting               # priority: 1030
   - ai-aws-content-moderation      # priority: 1040 TODO: compare priority 
with other ai plugins
   - proxy-mirror                   # priority: 1010
   - proxy-rewrite                  # priority: 1008
diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json
index b540df5f4..7074fbfc2 100644
--- a/docs/en/latest/config.json
+++ b/docs/en/latest/config.json
@@ -158,7 +158,8 @@
             "plugins/request-id",
             "plugins/proxy-control",
             "plugins/client-control",
-            "plugins/workflow"
+            "plugins/workflow",
+            "plugins/ai-rate-limiting"
           ]
         },
         {
diff --git a/docs/en/latest/plugins/ai-rate-limiting.md 
b/docs/en/latest/plugins/ai-rate-limiting.md
new file mode 100644
index 000000000..839818153
--- /dev/null
+++ b/docs/en/latest/plugins/ai-rate-limiting.md
@@ -0,0 +1,117 @@
+---
+title: AI Rate Limiting
+keywords:
+  - Apache APISIX
+  - API Gateway
+  - Plugin
+  - ai-rate-limiting
+description: The ai-rate-limiting plugin enforces token-based rate limiting 
for LLM service requests, preventing overuse, optimizing API consumption, and 
ensuring efficient resource allocation.
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+## Description
+
+The `ai-rate-limiting` plugin enforces token-based rate limiting for requests 
sent to LLM services. It helps manage API usage by controlling the number of 
tokens consumed within a specified time frame, ensuring fair resource 
allocation and preventing excessive load on the service. It is often used with 
`ai-proxy` or `ai-proxy-multi` plugin.
+
+## Plugin Attributes
+
+| Name                      | Type          | Required | Description           
                                                                                
                                                                                
                                                                                
                        |
+| ------------------------- | ------------- | -------- | 
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 |
+| `limit`                   | integer       | false    | The maximum number of 
tokens allowed to consume within a given time interval. At least one of `limit` 
and `instances.limit` should be configured.                                     
                                                                                
                        |
+| `time_window`             | integer       | false    | The time interval 
corresponding to the rate limiting `limit` in seconds. At least one of 
`time_window` and `instances.time_window` should be configured.                 
                                                                                
                                     |
+| `show_limit_quota_header` | boolean       | false    | If true, include 
`X-AI-RateLimit-Limit-*` to show the total quota, `X-AI-RateLimit-Remaining-*` 
to show the remaining quota in the response header, and 
`X-AI-RateLimit-Reset-*` to show the number of seconds left for the counter to 
reset, where `*` is the instance name. Default: `true` |
+| `limit_strategy`          | string        | false    | Type of token to 
apply rate limiting. `total_tokens`, `prompt_tokens`, and `completion_tokens` 
values are returned in each model response, where `total_tokens` is the sum of 
`prompt_tokens` and `completion_tokens`. Default: `total_tokens`                
                                |
+| `instances`               | array[object] | false    | LLM instance rate 
limiting configurations.                                                        
                                                                                
                                                                                
                            |
+| `instances.name`          | string        | true     | Name of the LLM 
service instance.                                                               
                                                                                
                                                                                
                              |
+| `instances.limit`         | integer       | true     | The maximum number of 
tokens allowed to consume within a given time interval.                         
                                                                                
                                                                                
                        |
+| `instances.time_window`   | integer       | true     | The time interval 
corresponding to the rate limiting `limit` in seconds.                          
                                                                                
                                                                                
                            |
+| `rejected_code`           | integer       | false    | The HTTP status code 
returned when a request exceeding the quota is rejected. Default: `503`         
                                                                                
                                                                                
                         |
+| `rejected_msg`            | string        | false    | The response body 
returned when a request exceeding the quota is rejected.                        
                                                                                
                                                                                
                            |
+
+## Example
+
+Create a route as such and update with your LLM providers, models, API keys, 
and endpoints:
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes"; -X PUT \
+  -H "X-API-KEY: ${ADMIN_API_KEY}" \
+  -d '{
+    "id": "ai-rate-limiting-route",
+    "uri": "/anything",
+    "methods": ["POST"],
+    "plugins": {
+      "ai-proxy": {
+        "provider": "openai",
+        "auth": {
+          "header": {
+            "Authorization": "Bearer '"$API_KEY"'"
+          }
+        },
+        "options": {
+          "model": "gpt-35-turbo-instruct",
+          "max_tokens": 512,
+          "temperature": 1.0
+        }
+      },
+      "ai-rate-limiting": {
+        "limit": 300,
+        "time_window": 30,
+        "limit_strategy": "prompt_tokens"
+      }
+    }
+  }'
+```
+
+Send a POST request to the route with a system prompt and a sample user 
question in the request body:
+
+```shell
+curl "http://127.0.0.1:9080/anything"; -X POST \
+  -H "Content-Type: application/json" \
+  -d '{
+    "messages": [
+      { "role": "system", "content": "You are a mathematician" },
+      { "role": "user", "content": "What is 1+1?" }
+    ]
+  }'
+```
+
+You should receive a response similar to the following:
+
+```json
+{
+  ...
+  "model": "deepseek-chat",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "1 + 1 equals 2. This is a fundamental arithmetic operation 
where adding one unit to another results in a total of two units."
+      },
+      "logprobs": null,
+      "finish_reason": "stop"
+    }
+  ],
+  ...
+}
+```
+
+If rate limiting quota of 300 tokens has been consumed in a 30-second window, 
the additional requests will all be rejected.
diff --git a/t/APISIX.pm b/t/APISIX.pm
index 2e1724a12..f4b9b8055 100644
--- a/t/APISIX.pm
+++ b/t/APISIX.pm
@@ -558,6 +558,8 @@ _EOC_
     lua_shared_dict plugin-limit-count 10m;
     lua_shared_dict plugin-limit-count-reset-header 10m;
     lua_shared_dict plugin-limit-conn 10m;
+    lua_shared_dict plugin-ai-rate-limiting 10m;
+    lua_shared_dict plugin-ai-rate-limiting-reset-header 10m;
     lua_shared_dict internal-status 10m;
     lua_shared_dict upstream-healthcheck 32m;
     lua_shared_dict worker-events 10m;
diff --git a/t/admin/plugins.t b/t/admin/plugins.t
index c43d5ffeb..2a759bbe3 100644
--- a/t/admin/plugins.t
+++ b/t/admin/plugins.t
@@ -101,6 +101,7 @@ ai-rag
 ai-aws-content-moderation
 ai-proxy-multi
 ai-proxy
+ai-rate-limiting
 proxy-mirror
 proxy-rewrite
 workflow
diff --git a/t/plugin/ai-rate-limiting.t b/t/plugin/ai-rate-limiting.t
new file mode 100644
index 000000000..a6396acf0
--- /dev/null
+++ b/t/plugin/ai-rate-limiting.t
@@ -0,0 +1,539 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+
+my $resp_file = 't/assets/ai-proxy-response.json';
+open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
+my $resp = do { local $/; <$fh> };
+close($fh);
+
+print "Hello, World!\n";
+print $resp;
+
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $http_config = $block->http_config // <<_EOC_;
+        server {
+            server_name openai;
+            listen 16724;
+
+            default_type 'application/json';
+
+            location /anything {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+
+                    if ngx.req.get_method() ~= "POST" then
+                        ngx.status = 400
+                        ngx.say("Unsupported request method: ", 
ngx.req.get_method())
+                    end
+                    ngx.req.read_body()
+                    local body = ngx.req.get_body_data()
+
+                    if body ~= "SELECT * FROM STUDENTS" then
+                        ngx.status = 503
+                        ngx.say("passthrough doesn't work")
+                        return
+                    end
+                    ngx.say('{"foo", "bar"}')
+                }
+            }
+
+            location /v1/chat/completions {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+
+                    if ngx.req.get_method() ~= "POST" then
+                        ngx.status = 400
+                        ngx.say("Unsupported request method: ", 
ngx.req.get_method())
+                    end
+                    ngx.req.read_body()
+                    local body, err = ngx.req.get_body_data()
+                    body, err = json.decode(body)
+
+                    local test_type = ngx.req.get_headers()["test-type"]
+                    if test_type == "options" then
+                        if body.foo == "bar" then
+                            ngx.status = 200
+                            ngx.say("options works")
+                        else
+                            ngx.status = 500
+                            ngx.say("model options feature doesn't work")
+                        end
+                        return
+                    end
+
+                    local header_auth = ngx.req.get_headers()["authorization"]
+                    local query_auth = ngx.req.get_uri_args()["apikey"]
+
+                    if header_auth ~= "Bearer token" and query_auth ~= 
"apikey" then
+                        ngx.status = 401
+                        ngx.say("Unauthorized")
+                        return
+                    end
+
+                    if header_auth == "Bearer token" or query_auth == "apikey" 
then
+                        ngx.req.read_body()
+                        local body, err = ngx.req.get_body_data()
+                        body, err = json.decode(body)
+
+                        if not body.messages or #body.messages < 1 then
+                            ngx.status = 400
+                            ngx.say([[{ "error": "bad request"}]])
+                            return
+                        end
+
+                        if body.messages[1].content == "write an SQL query to 
get all rows from student table" then
+                            ngx.print("SELECT * FROM STUDENTS")
+                            return
+                        end
+
+                        ngx.status = 200
+                        ngx.say([[
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": { "content": "1 + 1 = 2.", "role": "assistant" }
+    }
+  ],
+  "created": 1723780938,
+  "id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
+  "model": "gpt-4o-2024-05-13",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_abc28019ad",
+  "usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
+}
+                        ]])
+                        return
+                    end
+
+
+                    ngx.status = 503
+                    ngx.say("reached the end of the test suite")
+                }
+            }
+
+            location /random {
+                content_by_lua_block {
+                    ngx.say("path override works")
+                }
+            }
+        }
+_EOC_
+
+    $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: sanity
+--- config
+    location /t {
+        content_by_lua_block {
+            local configs = {
+                {
+                    time_window = 60,
+                },
+                {
+                    limit = 30,
+                },
+                {
+                    limit = 30,
+                    time_window = 60,
+                    rejected_code = 199,
+                },
+                {
+                    limit = 30,
+                    time_window = 60,
+                    limit_strategy = "invalid",
+                },
+                {
+                    limit = 30,
+                    time_window = 60,
+                    instances = {
+                        {
+                            name = "instance1",
+                            limit = 30,
+                            time_window = 60,
+                        },
+                        {
+                            limit = 30,
+                            time_window = 60,
+                        }
+                    },
+                },
+                {
+                    time_window = 60,
+                    instances = {
+                        {
+                            name = "instance1",
+                            limit = 30,
+                            time_window = 60,
+                        }
+                    },
+                },
+                {
+                    limit = 30,
+                    time_window = 60,
+                    rejected_code = 403,
+                    rejected_msg = "rate limit exceeded",
+                    limit_strategy = "completion_tokens",
+                }
+            }
+            local core = require("apisix.core")
+            local plugin = require("apisix.plugins.ai-rate-limiting")
+            for _, config in ipairs(configs) do
+                local ok, err = plugin.check_schema(config)
+                if not ok then
+                    ngx.say(err)
+                else
+                    ngx.say("passed")
+                end
+            end
+            ngx.say("done")
+        }
+    }
+--- response_body
+property "limit" is required
+property "time_window" is required
+property "rejected_code" validation failed: expected 199 to be at least 200
+property "limit_strategy" validation failed: matches none of the enum values
+property "instances" validation failed: failed to validate item 2: property 
"name" is required
+property "limit" is required
+passed
+done
+
+
+
+=== TEST 2: set route 1, default limit_strategy: total_tokens
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/ai",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer token"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16724";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 30,
+                            "time_window": 60
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 3: reject the 3th request
+--- pipelined_requests eval
+[
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_headers
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 503]
+
+
+
+=== TEST 4: set rejected_code to 403, rejected_msg to "rate limit exceeded"
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/ai",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer token"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16724";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 30,
+                            "time_window": 60,
+                            "rejected_code": 403,
+                            "rejected_msg": "rate limit exceeded"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 5: check code and message
+--- pipelined_requests eval
+[
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_headers
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 403]
+--- response_body eval
+[
+    qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
+    qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
+    qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
+    qr/\{"error_msg":"rate limit exceeded"\}/,
+]
+
+
+
+=== TEST 6: check rate limit headers
+--- request
+POST /ai
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- response_headers
+X-AI-RateLimit-Limit-ai-proxy: 30
+X-AI-RateLimit-Remaining-ai-proxy: 29
+X-AI-RateLimit-Reset-ai-proxy: 60
+
+
+
+=== TEST 7: check rate limit headers after 4 requests
+--- pipelined_requests eval
+[
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_header
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 403]
+--- response_headers eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy: 29",
+    "X-AI-RateLimit-Remaining-ai-proxy: 19",
+    "X-AI-RateLimit-Remaining-ai-proxy: 9",
+    "X-AI-RateLimit-Remaining-ai-proxy: 0",
+]
+
+
+
+=== TEST 8: set route2 with limit_strategy: completion_tokens
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/2',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/ai2",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer token"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16724";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 20,
+                            "time_window": 45,
+                            "limit_strategy": "completion_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 9: reject the 5th request
+--- pipelined_requests eval
+[
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_headers
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 200, 503]
+
+
+
+=== TEST 10: check rate limit headers
+--- request
+POST /ai2
+{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
+--- more_headers
+Authorization: Bearer token
+--- response_headers
+X-AI-RateLimit-Limit-ai-proxy: 20
+X-AI-RateLimit-Remaining-ai-proxy: 19
+X-AI-RateLimit-Reset-ai-proxy: 45
+
+
+
+=== TEST 11: multi-request
+--- pipelined_requests eval
+[
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_header
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 200, 503]
+--- response_headers eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy: 19",
+    "X-AI-RateLimit-Remaining-ai-proxy: 14",
+    "X-AI-RateLimit-Remaining-ai-proxy: 9",
+    "X-AI-RateLimit-Remaining-ai-proxy: 4",
+    "X-AI-RateLimit-Remaining-ai-proxy: 0",
+]
+
+
+
+=== TEST 12: request route 1 and route 2
+--- pipelined_requests eval
+[
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+    "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": 
\"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 
1+1?\"} ] }",
+]
+--- more_headers
+Authorization: Bearer token
+--- error_code eval
+[200, 200, 200, 200, 200, 200, 200, 403, 503]

(apisix) branch master updated: feat: ai-rate-limiting plugin (#12037)

Reply via email to