Le mar. 11 nov. 2025 à 21:05, Daniel Baumann <[email protected]> a écrit :
> retitle 1117591 add support for nginx > severity 1117591 wishlist > thanks > > Hi Jeremy, > > thank you for your offer, please send attach a patch to this bug report > based on https://forgejo.debian.net/web/ai.robots.txt Here it is !
From f5b6ae3a04a29ede01b146069d907f614ae9017a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lal?= <[email protected]> Date: Sun, 30 Nov 2025 22:30:21 +0100 Subject: [PATCH] nginx-ai-bots --- debian/control | 17 +++++++++++++ debian/local/nginx/block-ai-bots.conf | 5 ++++ debian/local/nginx/block-ai-bots.conf.in | 6 +++++ debian/nginx-ai-bots.README.Debian | 31 ++++++++++++++++++++++++ debian/nginx-ai-bots.postinst | 28 +++++++++++++++++++++ debian/rules | 21 +++++++++++++--- 6 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 debian/local/nginx/block-ai-bots.conf create mode 100644 debian/local/nginx/block-ai-bots.conf.in create mode 100644 debian/nginx-ai-bots.README.Debian create mode 100755 debian/nginx-ai-bots.postinst diff --git a/debian/control b/debian/control index 61ca8f0..c3a9a7b 100644 --- a/debian/control +++ b/debian/control @@ -25,3 +25,20 @@ Description: list of AI agents and robots to block (apache2) . This package contains the apache2 integration, please see /usr/share/doc/apache2-ai-bots/README.Debian on how to enable it. + +Package: nginx-ai-bots +Section: web +Architecture: all +Depends: + nginx, + ${misc:Depends}, +Description: list of AI agents and robots to block (nginx) + ai.robots.txt is a list containing AI-related crawlers of all types, regardless + of purpose. + . + Blocking access based on the user agent does not block all crawlers, but it is + a simple and low overhead way of blocking most crawlers. + . + This package contains the nginx integration, + please see /usr/share/doc/nginx-ai-bots/README.Debian on how to enable it. + diff --git a/debian/local/nginx/block-ai-bots.conf b/debian/local/nginx/block-ai-bots.conf new file mode 100644 index 0000000..bc6146c --- /dev/null +++ b/debian/local/nginx/block-ai-bots.conf @@ -0,0 +1,5 @@ +# Needs /etc/nginx/conf.d/block-ai-bots.conf + +if ($block_ai_bots) { + return 403; +} diff --git a/debian/local/nginx/block-ai-bots.conf.in b/debian/local/nginx/block-ai-bots.conf.in new file mode 100644 index 0000000..c21bf18 --- /dev/null +++ b/debian/local/nginx/block-ai-bots.conf.in @@ -0,0 +1,6 @@ +# To use this, include /etc/nginx/snippets/block-ai-bots.conf in a server or location block + +map $http_user_agent $block_ai_bots { + default 0; + ~*(@AI_BOTS@) 1; +} diff --git a/debian/nginx-ai-bots.README.Debian b/debian/nginx-ai-bots.README.Debian new file mode 100644 index 0000000..245d0df --- /dev/null +++ b/debian/nginx-ai-bots.README.Debian @@ -0,0 +1,31 @@ +ai.robots.txt for nginx +======================= + +Bots are reported to now make up half of the web's traffic. There are several +ways of blocking these. + +While there are more advanced tools like Anubis[0], for smaller sites often the +simpler approach of blocking access by user-agent is currently still enough. + +Here are the different ways how to enable this blocklist with your website. + + +Usage +----- + +In a server or location block, do: + +``` +include /etc/nginx/snippets/block-ai-bots.conf; +``` + +Then reload nginx. + +Robots.txt +---------- + +An example rfc9309 file can be found at + +/usr/share/nginx-ai-bots/robots.txt + +It is usually meant to be dynamically appended to the pre-existing server's robots.txt. diff --git a/debian/nginx-ai-bots.postinst b/debian/nginx-ai-bots.postinst new file mode 100755 index 0000000..e1efe3e --- /dev/null +++ b/debian/nginx-ai-bots.postinst @@ -0,0 +1,28 @@ +#!/bin/sh + +set -e + +case "${1}" in + configure) + if [ -d /run/systemd/system ] + then + if [ -n "${2}" ] + then + deb-systemd-invoke reload 'nginx.service' >/dev/null || true + fi + fi + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + + ;; + + *) + echo "postinst called with unknown argument \`${1}'" >&2 + exit 1 + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/debian/rules b/debian/rules index c6f1fe6..3d10835 100755 --- a/debian/rules +++ b/debian/rules @@ -1,5 +1,8 @@ #!/usr/bin/make -f +apache_conf = debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf +nginx_conf = debian/nginx-ai-bots/etc/nginx/conf.d/robots-ai-bots.conf + %: dh ${@} @@ -9,16 +12,26 @@ override_dh_auto_install: cp .htaccess debian/apache2-ai-bots/usr/share/apache2-ai-bots/htaccess cp robots.txt debian/apache2-ai-bots//usr/share/apache2-ai-bots + mkdir -p debian/nginx-ai-bots/etc/nginx/snippets/ + cp debian/local/nginx/block-ai-bots.conf debian/nginx-ai-bots/etc/nginx/snippets/block-ai-bots.conf + # apache2 - mkdir -p debian/apache2-ai-bots/etc/apache2/conf-available - cp debian/local/apache2/block-ai-bots.conf.in debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf + mkdir -p $(dir $(apache_conf)) + cp debian/local/apache2/block-ai-bots.conf.in $(apache_conf) + + # nginx + mkdir -p $(dir $(nginx_conf)) + cp debian/local/nginx/block-ai-bots.conf.in $(nginx_conf) for BOT in $(shell jq 'keys[]' robots.json); \ do \ - sed -i -e "s|@AI_BOTS@|BrowserMatchNoCase $${BOT} block_ai_bot=true\n@AI_BOTS@|" debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf; \ + sed -i -e "s|@AI_BOTS@|BrowserMatchNoCase $${BOT} block_ai_bot=true\n@AI_BOTS@|" $(apache_conf); \ + escaBOT=$$(echo $${BOT} | sed 's/ /\\\\s/g' | sed 's/\./\\\\./g'); \ + sed -i -e "s|@AI_BOTS@|$${escaBOT}\\|@AI_BOTS@|" $(nginx_conf); \ done - sed -i -e '/@AI_BOTS@/d' debian/apache2-ai-bots/etc/apache2/conf-available/block-ai-bots.conf + sed -i -e '/@AI_BOTS@/d' $(apache_conf) + sed -i -e 's/|@AI_BOTS@//' $(nginx_conf) upstream: rm -rf assets/images/noai-logo.png -- 2.51.0

