Support disallowing robots

Add a disallow_robots parameter which can be used to disallow
indexing of all site content by robots. This is particularly useful
for test deployments where you don't want extra (often stale) copies
of your content to show up in search engines.

Change-Id: Ic62a72555315bd344db338809920a3605f17c8c6
This commit is contained in:
Jeremy Stanley 2016-09-07 20:45:25 +00:00
parent 43b131e06d
commit d79c672d41
3 changed files with 19 additions and 0 deletions

View File

@ -0,0 +1,2 @@
User-agent: *
Disallow: /

View File

@ -24,6 +24,7 @@ class mediawiki(
$wg_sitename = undef,
$wg_logo = undef,
$wg_openidforcedprovider = 'https://login.launchpad.net/+openid',
$disallow_robots = false,
) {
if ($role == 'app' or $role == 'all') {
@ -46,6 +47,17 @@ class mediawiki(
require => File['/srv/mediawiki'],
}
if $disallow_robots == true {
file { '/srv/mediawiki/robots.txt':
ensure => file,
group => 'root',
mode => '0444',
owner => 'root',
source => 'puppet:///modules/mediawiki/disallow_robots.txt',
require => File['/srv/mediawiki'],
}
}
include ::httpd
include ::mediawiki::php
include ::mediawiki::app

View File

@ -95,6 +95,11 @@
Alias /w <%= scope['mediawiki::mediawiki_location'] %>
Alias /wiki <%= scope['mediawiki::mediawiki_location'] %>/index.php
<% if scope['mediawiki::disallow_robots'] == true %>
# Request that search engines not index this site
Alias /robots.txt /srv/mediawiki/robots.txt
<% end %>
# Redirect old /Article_Name urls
RewriteEngine on
RewriteCond %{REQUEST_URI} !^/w/