Merge "Add a sqlite middleware to load logged sqlite databases on-the-fly"
This commit is contained in:
commit
f137e8fb1f
|
@ -0,0 +1,134 @@
|
|||
# Copyright (c) 2017 Red Hat, Inc.
|
||||
#
|
||||
# This file is part of ARA: Ansible Run Analysis.
|
||||
#
|
||||
# ARA is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# ARA is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with ARA. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# A WSGI script to load the ARA web application against a variable database
|
||||
# location requested over HTTP.
|
||||
# Can be configured using environment variables (i.e, Apache SetEnv) with the
|
||||
# following variables:
|
||||
#
|
||||
# ARA_WSGI_USE_VIRTUALENV
|
||||
# Enable virtual environment usage if ARA is installed in a virtual
|
||||
# environment.
|
||||
# Defaults to '0', set to '1' to enable.
|
||||
# ARA_WSGI_VIRTUALENV_PATH
|
||||
# When using a virtual environment, where the virtualenv is located.
|
||||
# Defaults to None, set to the absolute path of your virtualenv.
|
||||
# ARA_WSGI_TMPDIR_MAX_AGE
|
||||
# This WSGI middleware creates temporary directories which should be
|
||||
# discarded on a regular basis to avoid them accumulating.
|
||||
# This is a duration, in seconds, before cleaning directories up.
|
||||
# Defaults to 3600.
|
||||
# ARA_WSGI_LOG_ROOT
|
||||
# Absolute path on the filesystem that matches the DocumentRoot of your
|
||||
# webserver vhost.
|
||||
# Defaults to '/srv/static/logs'.
|
||||
# ARA_WSGI_DATABASE_DIRECTORY
|
||||
# Subdirectory in which ARA sqlite databases are expected to reside in.
|
||||
# For example, 'ara-report' would expect:
|
||||
# http://logserver/some/path/ara-report/ansible.sqlite
|
||||
# This variable should match the 'WSGIScriptAliasMatch' pattern of your
|
||||
# webserver vhost.
|
||||
# Defaults to 'ara-report'
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import six
|
||||
import time
|
||||
|
||||
if (int(os.getenv('ARA_WSGI_USE_VIRTUALENV', 0)) == 1 and
|
||||
os.getenv('ARA_WSGI_VIRTUALENV_PATH')):
|
||||
activate_this = os.getenv('ARA_WSGI_VIRTUALENV_PATH')
|
||||
if six.PY2:
|
||||
execfile(activate_this, dict(__file__=activate_this)) # nosec
|
||||
else:
|
||||
exec(open(activate_this).read()) # nosec
|
||||
|
||||
TMPDIR_MAX_AGE = int(os.getenv('ARA_WSGI_TMPDIR_MAX_AGE', 3600))
|
||||
LOG_ROOT = os.getenv('ARA_WSGI_LOG_ROOT', '/srv/static/logs')
|
||||
DATABASE_DIRECTORY = os.getenv('ARA_WSGI_DATABASE_DIRECTORY', 'ara-report')
|
||||
|
||||
logger = logging.getLogger('ara.wsgi_sqlite')
|
||||
if not logger.handlers:
|
||||
logging.basicConfig(format='%(name)s:%(levelname)s:%(message)s')
|
||||
|
||||
|
||||
def bad_request(environ, start_response, message):
|
||||
logger.error('HTTP 400: %s' % message)
|
||||
message = """
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<title>400 Bad Request</title>
|
||||
<h1>Bad Request</h1>
|
||||
<p>%s</p>""" % message
|
||||
status = '400 Bad Request'
|
||||
response_headers = [('Content-Type', 'text/html')]
|
||||
start_response(status, response_headers)
|
||||
return [message]
|
||||
|
||||
|
||||
def application(environ, start_response):
|
||||
request = environ['REQUEST_URI']
|
||||
match = re.search('/(?P<path>.*/{}/)'.format(DATABASE_DIRECTORY), request)
|
||||
if not match:
|
||||
return bad_request(environ, start_response,
|
||||
'No "/{}/" in URL.'.format(DATABASE_DIRECTORY))
|
||||
|
||||
path = os.path.abspath(os.path.join(LOG_ROOT, match.group('path')))
|
||||
|
||||
# Ensure we don't escape outside LOG_ROOT and we are looking at a
|
||||
# valid directory
|
||||
if not path.startswith(LOG_ROOT) or not os.path.isdir(path):
|
||||
logger.error('Directory access violation: %s' % path)
|
||||
return bad_request(environ, start_response, 'No directory found.')
|
||||
|
||||
database = os.path.join(path, 'ansible.sqlite')
|
||||
if not os.path.isfile(database):
|
||||
return bad_request(environ, start_response, 'No ARA database found.')
|
||||
|
||||
# ARA and Ansible (when loading configuration) both expect a directory
|
||||
# they are able to write to, this can be safely discarded.
|
||||
# Nothing is read from here and there is therefore no security risks.
|
||||
# It needs to be at a known location in order to be able to clean it up
|
||||
# so it doesn't accumulate needless directories and files.
|
||||
# TODO: ARA 1.0 no longer requires temporary directories, clean this up.
|
||||
tmpdir = '/tmp/ara_wsgi_sqlite' # nosec
|
||||
if os.path.exists(tmpdir):
|
||||
# Periodically delete this directory to avoid accumulating directories
|
||||
# and files endlessly
|
||||
now = time.time()
|
||||
if now - TMPDIR_MAX_AGE > os.path.getmtime(tmpdir):
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
os.environ['ANSIBLE_LOCAL_TEMP'] = os.path.join(tmpdir, '.ansible')
|
||||
os.environ['ARA_DIR'] = os.path.join(tmpdir, '.ara')
|
||||
|
||||
# Path to the ARA database
|
||||
os.environ['ARA_DATABASE'] = 'sqlite:///{}'.format(database)
|
||||
|
||||
from ara.webapp import create_app
|
||||
try:
|
||||
app = create_app()
|
||||
app.config['APPLICATION_ROOT'] = match.group('path')
|
||||
return app(environ, start_response)
|
||||
except Exception as e:
|
||||
# We're staying relatively vague on purpose to avoid disclosure
|
||||
logger.error('ARA bootstrap failure for %s: %s' % (database, str(e)))
|
||||
return bad_request(environ, start_response, 'ARA bootstrap failure.')
|
||||
|
||||
|
||||
def main():
|
||||
return application
|
|
@ -0,0 +1,124 @@
|
|||
.. _advanced_configuration:
|
||||
|
||||
Serving ARA sqlite databases over http
|
||||
======================================
|
||||
|
||||
Hosting statically generated reports is not very efficient at a large scale.
|
||||
The reports are relatively small in size but can contain thousands of files if
|
||||
you are generating a report that contains thousands of tasks.
|
||||
|
||||
However, using a centralized database (such as MySQL) might not be optimal
|
||||
either. Perhaps due to the latency or maybe because of the concurrency of the
|
||||
runs.
|
||||
It is also possible you are not interested in aggregating data in the first
|
||||
place and would rather keep individual reports.
|
||||
|
||||
ARA ships a bundled WSGI middleware, ``wsgi_sqlite.py``.
|
||||
|
||||
This middleware allows you to store your ``ansible.sqlite`` databases on a
|
||||
web server (for example, a logserver for your CI jobs) and load these databases
|
||||
on the fly without needing to generate static reports.
|
||||
|
||||
It works by matching a requested URL
|
||||
(ex: ``http://logserver/some/path/ara-report``) against the filesystem location
|
||||
(ex: ``/srv/static/logs/some/path/ara-report/ansible.sqlite``) and loading
|
||||
ARA's web application so that it reads from the database directly.
|
||||
|
||||
To put this use case into perspective, it was "benchmarked" against a single
|
||||
job from the OpenStack-Ansible_ project:
|
||||
|
||||
- 4 playbooks
|
||||
- 4647 tasks
|
||||
- 4760 results
|
||||
- 53 hosts, of which 39 had gathered host facts
|
||||
- 416 saved files
|
||||
|
||||
Generating a static report from that database takes ~1min30s on an average
|
||||
machine. It weighs 63MB (27MB recursively gzipped), contains 5321 files and
|
||||
5243 directories.
|
||||
|
||||
This middleware allows you to host the exact same report on your web server
|
||||
just by storing the sqlite database which is just one file and weighs 5.6MB.
|
||||
|
||||
.. _OpenStack-Ansible: https://github.com/openstack/openstack-ansible
|
||||
|
||||
wsgi_sqlite configuration
|
||||
-------------------------
|
||||
|
||||
Configuration for the ``wsgi_sqlite.py`` script can be done through environment
|
||||
variables, for example with Apache's ``SetEnv`` directive.
|
||||
|
||||
ARA_WSGI_USE_VIRTUALENV
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Enable virtual environment usage if ARA is installed in a virtual
|
||||
environment. You will need to set ``ARA_WSGI_VIRTUALENV_PATH`` if enabling
|
||||
this.
|
||||
|
||||
Defaults to ``0``, set to ``1`` to enable.
|
||||
|
||||
ARA_WSGI_VIRTUALENV_PATH
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When using a virtual environment, where the virtualenv is located.
|
||||
Defaults to ``None``, set to the absolute path of your virtualenv.
|
||||
|
||||
ARA_WSGI_TMPDIR_MAX_AGE
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This WSGI middleware creates temporary directories which should be
|
||||
discarded on a regular basis to avoid them accumulating.
|
||||
This is a duration, in seconds, before cleaning directories up.
|
||||
|
||||
Defaults to ``3600``.
|
||||
|
||||
ARA_WSGI_LOG_ROOT
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
Absolute path on the filesystem that matches the ``DocumentRoot`` of your
|
||||
webserver vhost.
|
||||
|
||||
For a ``DocumentRoot`` of ``/srv/static/logs``, this value should be
|
||||
``/srv/static/logs``.
|
||||
|
||||
Defaults to ``/srv/static/logs``.
|
||||
|
||||
ARA_WSGI_DATABASE_DIRECTORY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Subdirectory in which ARA sqlite databases are expected to reside in.
|
||||
For example, ``ara-report`` would expect:
|
||||
``http://logserver/some/path/ara-report/ansible.sqlite``.
|
||||
|
||||
This variable should match the ``WSGIScriptAliasMatch`` pattern of your
|
||||
webserver vhost.
|
||||
|
||||
Defaults to ``ara-report``.
|
||||
|
||||
Using wsgi_sqlite with Apache's mod_wsgi
|
||||
----------------------------------------
|
||||
|
||||
The vhost requires you to redirect requests to ``*/ara-report/*`` to the WSGI
|
||||
middleware. In order to do so, the vhost must look like the following::
|
||||
|
||||
<VirtualHost *:80>
|
||||
# Remember that DocumentRoot and ARA_WSGI_LOG_ROOT must match
|
||||
DocumentRoot /srv/static/logs
|
||||
ServerName logs.domain.tld
|
||||
|
||||
ErrorLog /var/log/httpd/logs.domain.tld-error.log
|
||||
LogLevel warn
|
||||
CustomLog /var/log/httpd/logs.domain.tld-access.log combined
|
||||
|
||||
SetEnv ARA_WSGI_TMPDIR_MAX_AGE 3600
|
||||
SetEnv ARA_WSGI_LOG_ROOT /srv/static/logs
|
||||
SetEnv ARA_WSGI_DATABASE_DIRECTORY ara-report
|
||||
WSGIDaemonProcess ara user=apache group=apache processes=4 threads=1
|
||||
WSGIScriptAliasMatch ^.*/ara-report /var/www/cgi-bin/ara-wsgi-sqlite
|
||||
</VirtualHost>
|
||||
|
||||
You'll notice the ``WSGIScriptAliasMatch`` directive pointing to the WSGI
|
||||
script. This is bundled when installing ARA and can be copied to the location
|
||||
of your choice by doing::
|
||||
|
||||
cp -p $(which ara-wsgi-sqlite) /var/www/cgi-bin/
|
|
@ -12,6 +12,7 @@ Table of Contents
|
|||
Installation <installation>
|
||||
Configuration <configuration>
|
||||
Web Server configuration <webserver>
|
||||
Advanced use cases <advanced>
|
||||
Usage <usage>
|
||||
Contributing <contributing>
|
||||
Manifesto: Project core values <manifesto>
|
||||
|
|
Loading…
Reference in New Issue