From d79c672d4179ee57b4c7d741dc68d88e9430149f Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Wed, 7 Sep 2016 20:45:25 +0000 Subject: [PATCH] Support disallowing robots Add a disallow_robots parameter which can be used to disallow indexing of all site content by robots. This is particularly useful for test deployments where you don't want extra (often stale) copies of your content to show up in search engines. Change-Id: Ic62a72555315bd344db338809920a3605f17c8c6 --- files/disallow_robots.txt | 2 ++ manifests/init.pp | 12 ++++++++++++ templates/apache/mediawiki.erb | 5 +++++ 3 files changed, 19 insertions(+) create mode 100644 files/disallow_robots.txt diff --git a/files/disallow_robots.txt b/files/disallow_robots.txt new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/files/disallow_robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / diff --git a/manifests/init.pp b/manifests/init.pp index a34c980..243b062 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -24,6 +24,7 @@ class mediawiki( $wg_sitename = undef, $wg_logo = undef, $wg_openidforcedprovider = 'https://login.launchpad.net/+openid', + $disallow_robots = false, ) { if ($role == 'app' or $role == 'all') { @@ -46,6 +47,17 @@ class mediawiki( require => File['/srv/mediawiki'], } + if $disallow_robots == true { + file { '/srv/mediawiki/robots.txt': + ensure => file, + group => 'root', + mode => '0444', + owner => 'root', + source => 'puppet:///modules/mediawiki/disallow_robots.txt', + require => File['/srv/mediawiki'], + } + } + include ::httpd include ::mediawiki::php include ::mediawiki::app diff --git a/templates/apache/mediawiki.erb b/templates/apache/mediawiki.erb index 792ce6c..3905794 100644 --- a/templates/apache/mediawiki.erb +++ b/templates/apache/mediawiki.erb @@ -95,6 +95,11 @@ Alias /w <%= scope['mediawiki::mediawiki_location'] %> Alias /wiki <%= scope['mediawiki::mediawiki_location'] %>/index.php +<% if scope['mediawiki::disallow_robots'] == true %> + # Request that search engines not index this site + Alias /robots.txt /srv/mediawiki/robots.txt +<% end %> + # Redirect old /Article_Name urls RewriteEngine on RewriteCond %{REQUEST_URI} !^/w/