Add robots.txt to our list servers

We've noticed that our uwsgi queues are filling up and a lot of requests
are being made to robots.txt which ends up 500/503 erroring. Add a
robots.txt file which allows crawling of our lists and archives with a
delay value in hopes this will cause bots to cache results and not fill
up the queue with repetetive requests.

Change-Id: I660d8d43f6b2d96663212d93ec48e67d86e9e761
This commit is contained in:
Clark Boylan 2024-04-19 13:46:50 -07:00
parent 481f128257
commit f4dde583ed
3 changed files with 28 additions and 0 deletions

View File

@ -0,0 +1,7 @@
User-agent: *
Disallow: /accounts/*
Allow: /archives/*
Allow: /mailman3/lists/*
Crawl-delay: 2

View File

@ -141,6 +141,22 @@
shell:
cmd: docker image prune -f
- name: Create robots.txt location dir
file:
path: /var/www/robots
state: directory
owner: root
group: root
mode: '0755'
- name: Copy the robots.txt
copy:
src: robots.txt
dest: /var/www/robots/robots.txt
owner: root
group: root
mode: '0644'
- name: Install apache2
package:
name:

View File

@ -46,6 +46,7 @@
</Location>
RewriteEngine On
RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L]
RewriteRule "/pipermail/(.*)" "/var/lib/mailman/web-data/mm2archives/%{HTTP_HOST}/public/$1"
RewriteRule "/cgi-bin/mailman/listinfo/(.*)" "https://%{HTTP_HOST}/mailman3/lists/$1.%{HTTP_HOST}/"
RewriteRule "/cgi-bin/mailman/listinfo" "https://%{HTTP_HOST}/mailman3/lists/"
@ -66,4 +67,8 @@
Allow from all
Require all granted
</Directory>
<Directory "/var/www/robots">
Require all granted
</Directory>
</VirtualHost>