This is an automated email from the ASF dual-hosted git repository.
shreemaanabhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 695ea3c29 feat: ai-content-moderation plugin (#11541)
695ea3c29 is described below
commit 695ea3c29d067d3a445872974e59dff99b505499
Author: Shreemaan Abhishek <[email protected]>
AuthorDate: Thu Oct 10 21:26:20 2024 +0545
feat: ai-content-moderation plugin (#11541)
---
Makefile | 4 +
apisix-master-0.rockspec | 2 +-
apisix/cli/config.lua | 1 +
apisix/plugins/ai-content-moderation.lua | 179 ++++++++++++++
apisix/plugins/ai/openai.lua | 33 +++
conf/config.yaml.example | 1 +
docs/en/latest/config.json | 3 +-
docs/en/latest/plugins/ai-content-moderation.md | 253 ++++++++++++++++++++
t/admin/plugins.t | 1 +
t/assets/content-moderation-responses.json | 224 +++++++++++++++++
t/plugin/ai-content-moderation-secrets.t | 213 +++++++++++++++++
t/plugin/ai-content-moderation.t | 304 ++++++++++++++++++++++++
12 files changed, 1216 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 2082a0cf0..c72a12423 100644
--- a/Makefile
+++ b/Makefile
@@ -377,6 +377,10 @@ install: runtime
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-proxy/drivers
$(ENV_INSTALL) apisix/plugins/ai-proxy/drivers/*.lua
$(ENV_INST_LUADIR)/apisix/plugins/ai-proxy/drivers
+ # ai-content-moderation plugin
+ $(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai
+ $(ENV_INSTALL) apisix/plugins/ai/*.lua
$(ENV_INST_LUADIR)/apisix/plugins/ai
+
$(ENV_INSTALL) bin/apisix $(ENV_INST_BINDIR)/apisix
diff --git a/apisix-master-0.rockspec b/apisix-master-0.rockspec
index 829b40f0f..9851045ef 100644
--- a/apisix-master-0.rockspec
+++ b/apisix-master-0.rockspec
@@ -82,7 +82,7 @@ dependencies = {
"lua-resty-t1k = 1.1.5",
"brotli-ffi = 0.3-1",
"lua-ffi-zlib = 0.6-0",
- "api7-lua-resty-aws == 2.0.1-1",
+ "api7-lua-resty-aws == 2.0.2-1",
}
build = {
diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index 067e69d4c..57b4aa9d1 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -216,6 +216,7 @@ local _M = {
"body-transformer",
"ai-prompt-template",
"ai-prompt-decorator",
+ "ai-content-moderation",
"proxy-mirror",
"proxy-rewrite",
"workflow",
diff --git a/apisix/plugins/ai-content-moderation.lua
b/apisix/plugins/ai-content-moderation.lua
new file mode 100644
index 000000000..19029a653
--- /dev/null
+++ b/apisix/plugins/ai-content-moderation.lua
@@ -0,0 +1,179 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local core = require("apisix.core")
+local aws_instance = require("resty.aws")()
+local http = require("resty.http")
+local fetch_secrets = require("apisix.secret").fetch_secrets
+
+local next = next
+local pairs = pairs
+local unpack = unpack
+local type = type
+local ipairs = ipairs
+local require = require
+local HTTP_INTERNAL_SERVER_ERROR = ngx.HTTP_INTERNAL_SERVER_ERROR
+local HTTP_BAD_REQUEST = ngx.HTTP_BAD_REQUEST
+
+
+local aws_comprehend_schema = {
+ type = "object",
+ properties = {
+ access_key_id = { type = "string" },
+ secret_access_key = { type = "string" },
+ region = { type = "string" },
+ endpoint = {
+ type = "string",
+ pattern = [[^https?://]]
+ },
+ ssl_verify = {
+ type = "boolean",
+ default = true
+ }
+ },
+ required = { "access_key_id", "secret_access_key", "region", }
+}
+
+local moderation_categories_pattern = "^(PROFANITY|HATE_SPEECH|INSULT|"..
+
"HARASSMENT_OR_ABUSE|SEXUAL|VIOLENCE_OR_THREAT)$"
+local schema = {
+ type = "object",
+ properties = {
+ provider = {
+ type = "object",
+ properties = {
+ aws_comprehend = aws_comprehend_schema
+ },
+ maxProperties = 1,
+ -- ensure only one provider can be configured while implementing
support for
+ -- other providers
+ required = { "aws_comprehend" }
+ },
+ moderation_categories = {
+ type = "object",
+ patternProperties = {
+ [moderation_categories_pattern] = {
+ type = "number",
+ minimum = 0,
+ maximum = 1
+ }
+ },
+ additionalProperties = false
+ },
+ moderation_threshold = {
+ type = "number",
+ minimum = 0,
+ maximum = 1,
+ default = 0.5
+ },
+ llm_provider = {
+ type = "string",
+ enum = { "openai" },
+ }
+ },
+ required = { "provider", "llm_provider" },
+}
+
+
+local _M = {
+ version = 0.1,
+ priority = 1040, -- TODO: might change
+ name = "ai-content-moderation",
+ schema = schema,
+}
+
+
+function _M.check_schema(conf)
+ return core.schema.check(schema, conf)
+end
+
+
+function _M.rewrite(conf, ctx)
+ conf = fetch_secrets(conf, true, conf, "")
+ if not conf then
+ return HTTP_INTERNAL_SERVER_ERROR, "failed to retrieve secrets from
conf"
+ end
+
+ local body, err = core.request.get_json_request_body_table()
+ if not body then
+ return HTTP_BAD_REQUEST, err
+ end
+
+ local msgs = body.messages
+ if type(msgs) ~= "table" or #msgs < 1 then
+ return HTTP_BAD_REQUEST, "messages not found in request body"
+ end
+
+ local provider = conf.provider[next(conf.provider)]
+
+ local credentials = aws_instance:Credentials({
+ accessKeyId = provider.access_key_id,
+ secretAccessKey = provider.secret_access_key,
+ sessionToken = provider.session_token,
+ })
+
+ local default_endpoint = "https://comprehend." .. provider.region ..
".amazonaws.com"
+ local scheme, host, port = unpack(http:parse_uri(provider.endpoint or
default_endpoint))
+ local endpoint = scheme .. "://" .. host
+ aws_instance.config.endpoint = endpoint
+ aws_instance.config.ssl_verify = provider.ssl_verify
+
+ local comprehend = aws_instance:Comprehend({
+ credentials = credentials,
+ endpoint = endpoint,
+ region = provider.region,
+ port = port,
+ })
+
+ local ai_module = require("apisix.plugins.ai." .. conf.llm_provider)
+ local create_request_text_segments = ai_module.create_request_text_segments
+
+ local text_segments = create_request_text_segments(msgs)
+ local res, err = comprehend:detectToxicContent({
+ LanguageCode = "en",
+ TextSegments = text_segments,
+ })
+
+ if not res then
+ core.log.error("failed to send request to ", provider, ": ", err)
+ return HTTP_INTERNAL_SERVER_ERROR, err
+ end
+
+ local results = res.body and res.body.ResultList
+ if type(results) ~= "table" or core.table.isempty(results) then
+ return HTTP_INTERNAL_SERVER_ERROR, "failed to get moderation results
from response"
+ end
+
+ for _, result in ipairs(results) do
+ if conf.moderation_categories then
+ for _, item in pairs(result.Labels) do
+ if not conf.moderation_categories[item.Name] then
+ goto continue
+ end
+ if item.Score > conf.moderation_categories[item.Name] then
+ return HTTP_BAD_REQUEST, "request body exceeds " ..
item.Name .. " threshold"
+ end
+ ::continue::
+ end
+ end
+
+ if result.Toxicity > conf.moderation_threshold then
+ return HTTP_BAD_REQUEST, "request body exceeds toxicity threshold"
+ end
+ end
+end
+
+return _M
diff --git a/apisix/plugins/ai/openai.lua b/apisix/plugins/ai/openai.lua
new file mode 100644
index 000000000..203debb7e
--- /dev/null
+++ b/apisix/plugins/ai/openai.lua
@@ -0,0 +1,33 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local core = require("apisix.core")
+local ipairs = ipairs
+
+local _M = {}
+
+
+function _M.create_request_text_segments(msgs)
+ local text_segments = {}
+ for _, msg in ipairs(msgs) do
+ core.table.insert_tail(text_segments, {
+ Text = msg.content
+ })
+ end
+ return text_segments
+end
+
+return _M
diff --git a/conf/config.yaml.example b/conf/config.yaml.example
index e6d10118f..44005ffd0 100644
--- a/conf/config.yaml.example
+++ b/conf/config.yaml.example
@@ -479,6 +479,7 @@ plugins: # plugin list (sorted by
priority)
- body-transformer # priority: 1080
- ai-prompt-template # priority: 1071
- ai-prompt-decorator # priority: 1070
+ - ai-content-moderation # priority: 1040 TODO: compare priority
with other ai plugins
- proxy-mirror # priority: 1010
- proxy-rewrite # priority: 1008
- workflow # priority: 1006
diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json
index ac2403cfd..c2d8996ee 100644
--- a/docs/en/latest/config.json
+++ b/docs/en/latest/config.json
@@ -81,7 +81,8 @@
"plugins/ext-plugin-post-req",
"plugins/ext-plugin-post-resp",
"plugins/inspect",
- "plugins/ocsp-stapling"
+ "plugins/ocsp-stapling",
+ "plugins/ai-content-moderation"
]
},
{
diff --git a/docs/en/latest/plugins/ai-content-moderation.md
b/docs/en/latest/plugins/ai-content-moderation.md
new file mode 100644
index 000000000..781b203d9
--- /dev/null
+++ b/docs/en/latest/plugins/ai-content-moderation.md
@@ -0,0 +1,253 @@
+---
+title: ai-content-moderation
+keywords:
+ - Apache APISIX
+ - API Gateway
+ - Plugin
+ - ai-content-moderation
+description: This document contains information about the Apache APISIX
ai-content-moderation Plugin.
+---
+
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+## Description
+
+The `ai-content-moderation` plugin processes the request body to check for
toxicity and rejects the request if it exceeds the configured threshold.
+
+**_This plugin must be used in routes that proxy requests to LLMs only._**
+
+**_As of now, the plugin only supports the integration with [AWS
Comprehend](https://aws.amazon.com/comprehend/) for content moderation. PRs for
introducing support for other service providers are welcomed._**
+
+## Plugin Attributes
+
+| **Field** | **Required** | **Type** |
**Description**
|
+| ----------------------------------------- | ------------ | -------- |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
+| provider.aws_comprehend.access_key_id | Yes | String | AWS
access key ID
|
+| provider.aws_comprehend.secret_access_key | Yes | String | AWS
secret access key
|
+| provider.aws_comprehend.region | Yes | String | AWS
region
|
+| provider.aws_comprehend.endpoint | No | String | AWS
Comprehend service endpoint. Must match the pattern `^https?://`
|
+| moderation_categories | No | Object |
Key-value pairs of moderation category and their score. In each pair, the key
should be one of the `PROFANITY`, `HATE_SPEECH`, `INSULT`,
`HARASSMENT_OR_ABUSE`, `SEXUAL`, or `VIOLENCE_OR_THREAT`; and the value should
be between 0 and 1 (inclusive). |
+| moderation_threshold | No | Number |
The degree to which content is harmful, offensive, or inappropriate. A higher
value indicates more toxic content allowed. Range: 0 - 1. Default: 0.5
|
+| llm_provider | Yes | String | Name
of the LLM provider that this route will proxy requests to.
|
+
+## Example usage
+
+First initialise these shell variables:
+
+```shell
+ADMIN_API_KEY=edd1c9f034335f136f87ad84b625c8f1
+ACCESS_KEY_ID=aws-comprehend-access-key-id-here
+SECRET_ACCESS_KEY=aws-comprehend-secret-access-key-here
+OPENAI_KEY=open-ai-key-here
+```
+
+Create a route with the `ai-content-moderation` and `ai-proxy` plugin like so:
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \
+ -H "X-API-KEY: ${ADMIN_API_KEY}" \
+ -d '{
+ "uri": "/post",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "'"$ACCESS_KEY_ID"'",
+ "secret_access_key": "'"$SECRET_ACCESS_KEY"'",
+ "region": "us-east-1"
+ }
+ },
+ "moderation_categories": {
+ "PROFANITY": 0.5
+ },
+ "llm_provider": "openai"
+ },
+ "ai-proxy": {
+ "auth": {
+ "header": {
+ "api-key": "'"$OPENAI_KEY"'"
+ }
+ },
+ "model": {
+ "provider": "openai",
+ "name": "gpt-4",
+ "options": {
+ "max_tokens": 512,
+ "temperature": 1.0
+ }
+ }
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "httpbin.org:80": 1
+ }
+ }
+ }'
+```
+
+The `ai-proxy` plugin is used here as it simplifies access to LLMs. However,
you may configure the LLM in the upstream configuration as well.
+
+Now send a request:
+
+```shell
+curl http://127.0.0.1:9080/post -i -XPOST -H 'Content-Type: application/json'
-d '{
+ "messages": [
+ {
+ "role": "user",
+ "content": "<very profane message here>"
+ }
+ ]
+}'
+```
+
+Then the request will be blocked with error like this:
+
+```text
+HTTP/1.1 400 Bad Request
+Date: Thu, 03 Oct 2024 11:53:15 GMT
+Content-Type: text/plain; charset=utf-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+Server: APISIX/3.10.0
+
+request body exceeds PROFANITY threshold
+```
+
+Send a request with compliant content in the request body:
+
+```shell
+curl http://127.0.0.1:9080/post -i -XPOST -H 'Content-Type: application/json'
-d '{
+ "messages": [
+ {
+ "role": "system",
+ "content": "You are a mathematician"
+ },
+ { "role": "user", "content": "What is 1+1?" }
+ ]
+}'
+```
+
+This request will be proxied normally to the configured LLM.
+
+```text
+HTTP/1.1 200 OK
+Date: Thu, 03 Oct 2024 11:53:00 GMT
+Content-Type: text/plain; charset=utf-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+Server: APISIX/3.10.0
+
+{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"1+1 equals
2.","role":"assistant"}}],"created":1727956380,"id":"chatcmpl-AEEg8Pe5BAW5Sw3C1gdwXnuyulIkY","model":"gpt-4o-2024-05-13","object":"chat.completion","system_fingerprint":"fp_67802d9a6d","usage":{"completion_tokens":7,"prompt_tokens":23,"total_tokens":30}}
+```
+
+You can also configure filters on other moderation categories like so:
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \
+ -H "X-API-KEY: ${ADMIN_API_KEY}" \
+ -d '{
+ "uri": "/post",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "'"$ACCESS_KEY_ID"'",
+ "secret_access_key": "'"$SECRET_ACCESS_KEY"'",
+ "region": "us-east-1"
+ }
+ },
+ "llm_provider": "openai",
+ "moderation_categories": {
+ "PROFANITY": 0.5,
+ "HARASSMENT_OR_ABUSE": 0.7,
+ "SEXUAL": 0.2
+ }
+ },
+ "ai-proxy": {
+ "auth": {
+ "header": {
+ "api-key": "'"$OPENAI_KEY"'"
+ }
+ },
+ "model": {
+ "provider": "openai",
+ "name": "gpt-4",
+ "options": {
+ "max_tokens": 512,
+ "temperature": 1.0
+ }
+ }
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "httpbin.org:80": 1
+ }
+ }
+ }'
+```
+
+If none of the `moderation_categories` are configured, request bodies will be
moderated on the basis of overall toxicity.
+The default `moderation_threshold` is 0.5, it can be configured like so.
+
+```shell
+curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \
+ -H "X-API-KEY: ${ADMIN_API_KEY}" \
+ -d '{
+ "uri": "/post",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "'"$ACCESS_KEY_ID"'",
+ "secret_access_key": "'"$SECRET_ACCESS_KEY"'",
+ "region": "us-east-1"
+ }
+ },
+ "moderation_threshold": 0.7,
+ "llm_provider": "openai"
+ },
+ "ai-proxy": {
+ "auth": {
+ "header": {
+ "api-key": "'"$OPENAI_KEY"'"
+ }
+ },
+ "model": {
+ "provider": "openai",
+ "name": "gpt-4",
+ "options": {
+ "max_tokens": 512,
+ "temperature": 1.0
+ }
+ }
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "httpbin.org:80": 1
+ }
+ }
+}'
+```
diff --git a/t/admin/plugins.t b/t/admin/plugins.t
index e66662c91..6541bf764 100644
--- a/t/admin/plugins.t
+++ b/t/admin/plugins.t
@@ -96,6 +96,7 @@ proxy-cache
body-transformer
ai-prompt-template
ai-prompt-decorator
+ai-content-moderation
proxy-mirror
proxy-rewrite
workflow
diff --git a/t/assets/content-moderation-responses.json
b/t/assets/content-moderation-responses.json
new file mode 100644
index 000000000..e10c3d030
--- /dev/null
+++ b/t/assets/content-moderation-responses.json
@@ -0,0 +1,224 @@
+{
+ "good_request": {
+ "ResultList": [
+ {
+ "Toxicity": 0.02150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.00589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.01729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.00519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.00520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.00090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.00810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.00570000290871
+ }
+ ]
+ }
+ ]
+ },
+ "profane": {
+ "ResultList": [
+ {
+ "Toxicity": 0.62150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.55589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.21729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.25519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.12520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.27090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.44810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.27570000290871
+ }
+ ]
+ }
+ ]
+ },
+ "profane_but_not_toxic": {
+ "ResultList": [
+ {
+ "Toxicity": 0.12150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.55589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.21729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.25519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.12520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.27090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.44810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.27570000290871
+ }
+ ]
+ }
+ ]
+ },
+ "very_profane": {
+ "ResultList": [
+ {
+ "Toxicity": 0.72150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.85589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.21729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.25519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.12520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.27090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.94810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.27570000290871
+ }
+ ]
+ }
+ ]
+ },
+ "toxic": {
+ "ResultList": [
+ {
+ "Toxicity": 0.72150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.25589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.21729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.75519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.12520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.27090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.64810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.27570000290871
+ }
+ ]
+ }
+ ]
+ },
+ "very_toxic": {
+ "ResultList": [
+ {
+ "Toxicity": 0.92150000333786,
+ "Labels": [
+ {
+ "Name": "PROFANITY",
+ "Score": 0.25589999556541
+ },
+ {
+ "Name": "HATE_SPEECH",
+ "Score": 0.21729999780655
+ },
+ {
+ "Name": "INSULT",
+ "Score": 0.25519999861717
+ },
+ {
+ "Name": "GRAPHIC",
+ "Score": 0.12520000338554
+ },
+ {
+ "Name": "HARASSMENT_OR_ABUSE",
+ "Score": 0.27090001106262
+ },
+ {
+ "Name": "SEXUAL",
+ "Score": 0.44810000061989
+ },
+ {
+ "Name": "VIOLENCE_OR_THREAT",
+ "Score": 0.27570000290871
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/t/plugin/ai-content-moderation-secrets.t
b/t/plugin/ai-content-moderation-secrets.t
new file mode 100644
index 000000000..06d7941f7
--- /dev/null
+++ b/t/plugin/ai-content-moderation-secrets.t
@@ -0,0 +1,213 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BEGIN {
+ $ENV{VAULT_TOKEN} = "root";
+ $ENV{SECRET_ACCESS_KEY} = "super-secret";
+ $ENV{ACCESS_KEY_ID} = "access-key-id";
+}
+
+use t::APISIX 'no_plan';
+
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ listen 2668;
+
+ default_type 'application/json';
+
+ location / {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ local core = require("apisix.core")
+ local open = io.open
+
+ local f =
open('t/assets/content-moderation-responses.json', "r")
+ local resp = f:read("*a")
+ f:close()
+
+ if not resp then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to open
response.json file")
+ end
+
+ local responses = json.decode(resp)
+ if not responses then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to decode
response.json contents")
+ end
+
+ local headers = ngx.req.get_headers()
+ local auth_header = headers["Authorization"]
+ if core.string.find(auth_header, "access-key-id") then
+ ngx.say(json.encode(responses["good_request"]))
+ return
+ end
+ ngx.status = 403
+ ngx.say("unauthorized")
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests;
+
+__DATA__
+
+=== TEST 1: store secret into vault
+--- exec
+VAULT_TOKEN='root' VAULT_ADDR='http://0.0.0.0:8200' vault kv put kv/apisix/foo
secret_access_key=super-secret
+VAULT_TOKEN='root' VAULT_ADDR='http://0.0.0.0:8200' vault kv put kv/apisix/foo
access_key_id=access-key-id
+--- response_body
+Success! Data written to: kv/apisix/foo
+Success! Data written to: kv/apisix/foo
+
+
+
+=== TEST 2: set secret_access_key and access_key_id as a reference to secret
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ -- put secret vault config
+ local code, body = t('/apisix/admin/secrets/vault/test1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "http://127.0.0.1:8200",
+ "prefix" : "kv/apisix",
+ "token" : "root"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ return ngx.say(body)
+ end
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/echo",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id":
"$secret://vault/test1/foo/access_key_id",
+ "secret_access_key":
"$secret://vault/test1/foo/secret_access_key",
+ "region": "us-east-1",
+ "endpoint": "http://localhost:2668"
+ }
+ },
+ "llm_provider": "openai"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ return ngx.say(body)
+ end
+ ngx.say("success")
+ }
+ }
+--- request
+GET /t
+--- response_body
+success
+
+
+
+=== TEST 3: good request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+--- error_code: 200
+--- response_body chomp
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+
+
+
+=== TEST 4: set secret_access_key as a reference to env variable
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/echo",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "$env://ACCESS_KEY_ID",
+ "secret_access_key":
"$env://SECRET_ACCESS_KEY",
+ "region": "us-east-1",
+ "endpoint": "http://localhost:2668"
+ }
+ },
+ "llm_provider": "openai"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ return
+ end
+ ngx.say("success")
+ }
+ }
+--- request
+GET /t
+--- response_body
+success
+
+
+
+=== TEST 5: good request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+--- error_code: 200
+--- response_body chomp
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
diff --git a/t/plugin/ai-content-moderation.t b/t/plugin/ai-content-moderation.t
new file mode 100644
index 000000000..66393ef98
--- /dev/null
+++ b/t/plugin/ai-content-moderation.t
@@ -0,0 +1,304 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ listen 2668;
+
+ default_type 'application/json';
+
+ location / {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ local open = io.open
+ local f =
open('t/assets/content-moderation-responses.json', "r")
+ local resp = f:read("*a")
+ f:close()
+
+ if not resp then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to open
response.json file")
+ end
+
+ local responses = json.decode(resp)
+ if not responses then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to decode
response.json contents")
+ end
+
+ if ngx.req.get_method() ~= "POST" then
+ ngx.status = 400
+ ngx.say("Unsupported request method: ",
ngx.req.get_method())
+ end
+
+ ngx.req.read_body()
+ local body, err = ngx.req.get_body_data()
+ if not body then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to get request
body: ", err)
+ end
+
+ body, err = json.decode(body)
+ if not body then
+ ngx.status(503)
+ ngx.say("[INTERNAL FAILURE]: failed to decoded request
body: ", err)
+ end
+ local result = body.TextSegments[1].Text
+ local final_response = responses[result] or "invalid"
+
+ if final_response == "invalid" then
+ ngx.status = 500
+ end
+ ngx.say(json.encode(final_response))
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: sanity
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/echo",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "access",
+ "secret_access_key": "ea+secret",
+ "region": "us-east-1",
+ "endpoint": "http://localhost:2668"
+ }
+ },
+ "llm_provider": "openai"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 2: toxic request should fail
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"toxic"}]}
+--- error_code: 400
+--- response_body chomp
+request body exceeds toxicity threshold
+
+
+
+=== TEST 3: good request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+--- error_code: 200
+
+
+
+=== TEST 4: profanity filter
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/echo",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "access",
+ "secret_access_key": "ea+secret",
+ "region": "us-east-1",
+ "endpoint": "http://localhost:2668"
+ }
+ },
+ "moderation_categories": {
+ "PROFANITY": 0.5
+ },
+ "llm_provider": "openai"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 5: profane request should fail
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane"}]}
+--- error_code: 400
+--- response_body chomp
+request body exceeds PROFANITY threshold
+
+
+
+=== TEST 6: very profane request should also fail
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"very_profane"}]}
+--- error_code: 400
+--- response_body chomp
+request body exceeds PROFANITY threshold
+
+
+
+=== TEST 7: good_request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+--- error_code: 200
+
+
+
+=== TEST 8: set profanity = 0.7 (allow profane request but disallow
very_profane)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/echo",
+ "plugins": {
+ "ai-content-moderation": {
+ "provider": {
+ "aws_comprehend": {
+ "access_key_id": "access",
+ "secret_access_key": "ea+secret",
+ "region": "us-east-1",
+ "endpoint": "http://localhost:2668"
+ }
+ },
+ "moderation_categories": {
+ "PROFANITY": 0.7
+ },
+ "llm_provider": "openai"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 9: profane request should pass profanity check but fail toxicity check
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane"}]}
+--- error_code: 400
+--- response_body chomp
+request body exceeds toxicity threshold
+
+
+
+=== TEST 10: profane_but_not_toxic request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane_but_not_toxic"}]}
+--- error_code: 200
+
+
+
+=== TEST 11: but very profane request will fail
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"very_profane"}]}
+--- error_code: 400
+--- response_body chomp
+request body exceeds PROFANITY threshold
+
+
+
+=== TEST 12: good_request should pass
+--- request
+POST /echo
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
+--- error_code: 200