Allow the config to set the filter and view

Currently os-loganalyze will detect what filter and view to use based
off the file type, name and other headers. Instead, allow the config
to define exactly what view to use. If none is set then the legacy
detection will still be applied.

Next change will allow the config to define filters and views based
off file match conditions.

Change-Id: I8955577c100b13ce20609426025a68fbbd052423
This commit is contained in:
Joshua Hesketh 2015-04-14 22:14:25 +10:00
parent 0a4c50260d
commit d4ac63b711
9 changed files with 169 additions and 75 deletions

View File

@ -1,3 +1,7 @@
[general]
filter = SevFilter
view = HTMLView
[swift]
authurl=https://keystone.example.org/v2.0/
user=example

View File

@ -17,6 +17,8 @@
import re
import os_loganalyze.util as util
# which logs support severity
SUPPORTS_SEV = re.compile(
r'/' # this uses an re.search so anchor the string
@ -94,7 +96,7 @@ class LogLine(object):
self.line = line.rstrip()
class Filter(object):
class SevFilter(object):
def __init__(self, file_generator, minsev="NONE", limit=None):
self.minsev = minsev
@ -136,3 +138,41 @@ class Filter(object):
"""
minsev = self.minsev
return SEVS.get(sev, 0) < SEVS.get(minsev, 0)
class Line(object):
date = ''
def __init__(self, line):
self.line = line
class NoFilter(object):
supports_sev = False
def __init__(self, file_generator):
self.file_generator = file_generator
def __iter__(self):
for line in self.file_generator:
yield Line(line)
def get_filter_generator(file_generator, environ, root_path, config):
"""Return the filter to use as per the config."""
minsev = util.parse_param(environ, 'level', default="NONE")
limit = util.parse_param(environ, 'limit')
if config.has_section('general'):
if config.has_option('general', 'filter'):
set_filter = config.get('general', 'filter')
if set_filter.lower() in ['sevfilter', 'sev']:
return SevFilter(file_generator, minsev, limit)
elif set_filter.lower() in ['nofilter', 'no']:
return NoFilter(file_generator)
if util.use_passthrough_view(file_generator.file_headers):
return NoFilter(file_generator)
return SevFilter(file_generator, minsev, limit)

View File

@ -1,3 +1,6 @@
[general]
# Don't override the filter or view default detection
[swift]
authurl=https://keystone.example.org/v2.0/
user=example

View File

@ -0,0 +1,12 @@
[general]
filter = nofilter
view = passthrough
[swift]
authurl=https://keystone.example.org/v2.0/
user=example
password=example
container=logs
region=EXP
tenant=
chunk_size=64

View File

@ -30,8 +30,8 @@ class TestViews(base.TestCase):
kwargs = {'PATH_INFO': '/htmlify/%s' % fname}
file_generator = osgen.get_file_generator(self.fake_env(**kwargs),
root_path)
flines_generator = osfilter.Filter(file_generator)
return flines_generator
filter_generator = osfilter.SevFilter(file_generator)
return filter_generator
def test_html_detection(self):
gen = self.get_generator('sample.html')

View File

@ -171,6 +171,30 @@ class TestWsgiDisk(base.TestCase):
with open(base.samples_path('samples') + 'openstack_logo.png') as f:
self.assertEqual(first, f.readline())
def test_config_no_filter(self):
self.wsgi_config_file = (base.samples_path('samples') +
'wsgi_plain.conf')
# Try to limit the filter to 10 lines, but we should get the full
# amount.
gen = self.get_generator('devstacklog.txt.gz', limit=10)
lines = 0
for line in gen:
lines += 1
# the lines should actually be 2 + the limit we've asked for
# given the header and footer, but we expect to get the full file
self.assertNotEqual(12, lines)
def test_config_passthrough_view(self):
self.wsgi_config_file = (base.samples_path('samples') +
'wsgi_plain.conf')
# Check there is no HTML on a file that should otherwise have it
gen = self.get_generator('devstacklog.txt.gz')
first = gen.next()
self.assertNotIn('<html>', first)
class TestWsgiSwift(TestWsgiDisk):
"""Test loading files from swift."""

View File

@ -60,3 +60,47 @@ def get_headers_for_file(file_path):
resp['date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT")
resp['content-type'] = get_file_mime(file_path)
return resp
def should_be_html(environ):
"""Simple content negotiation.
If the client supports content negotiation, and asks for text/html,
we give it to them, unless they also specifically want to override
by passing ?content-type=text/plain in the query.
This should be able to handle the case of dumb clients defaulting to
html, but also let devs override the text format when 35 MB html
log files kill their browser (as per a nova-api log).
"""
text_override = False
accepts_html = ('HTTP_ACCEPT' in environ and
'text/html' in environ['HTTP_ACCEPT'])
parameters = cgi.parse_qs(environ.get('QUERY_STRING', ''))
if 'content-type' in parameters:
ct = cgi.escape(parameters['content-type'][0])
if ct == 'text/plain':
text_override = True
return accepts_html and not text_override
def use_passthrough_view(file_headers):
"""Guess if we need to use the passthrough filter."""
if 'content-type' not in file_headers:
# For legacy we'll try and format. This shouldn't occur though.
return False
else:
if file_headers['content-type'] in ['text/plain', 'text/html']:
# We want to format these files
return False
if file_headers['content-type'] in ['application/x-gzip',
'application/gzip']:
# We'll need to guess if we should render the output or offer a
# download.
filename = file_headers['filename']
filename = filename[:-3] if filename[-3:] == '.gz' else filename
if os.path.splitext(filename)[1] in ['.txt', '.html']:
return False
return True

View File

@ -17,6 +17,8 @@ import cgi
import collections
import re
import os_loganalyze.util as util
HTML_HEADER = """<html>
<head>
<style>
@ -121,8 +123,8 @@ class HTMLView(collections.Iterable):
is_html = False
no_escape_count = 0
def __init__(self, gen):
self.gen = gen
def __init__(self, filter_generator):
self.filter_generator = filter_generator
def _discover_html(self, line):
self.is_html = HTML_RE.match(line)
@ -161,13 +163,13 @@ class HTMLView(collections.Iterable):
return newline
def __iter__(self):
igen = (x for x in self.gen)
igen = (x for x in self.filter_generator)
first_line = next(igen)
self._discover_html(first_line.line)
if not self.is_html:
header = HTML_HEADER
if self.gen.supports_sev:
if self.filter_generator.supports_sev:
header += HTML_HEADER_SEV
header += HTML_HEADER_BODY
yield header
@ -188,22 +190,42 @@ class HTMLView(collections.Iterable):
class TextView(collections.Iterable):
headers = [('Content-type', 'text/plain')]
def __init__(self, gen):
self.gen = gen
def __init__(self, filter_generator):
self.filter_generator = filter_generator
def __iter__(self):
for line in self.gen:
for line in self.filter_generator:
yield line.date + line.line + "\n"
class PassthroughView(collections.Iterable):
headers = []
def __init__(self, gen):
self.gen = gen
for hn, hv in self.gen.file_headers.items():
self.headers.append((hn, hv))
def __init__(self, filter_generator):
self.filter_generator = filter_generator
for k, v in self.filter_generator.file_generator.file_headers.items():
self.headers.append((k, v))
def __iter__(self):
for line in self.gen:
yield line
for line in self.filter_generator:
yield line.line
def get_view_generator(filter_generator, environ, root_path, config):
"""Return the view to use as per the config."""
if config.has_section('general'):
if config.has_option('general', 'view'):
set_view = config.get('general', 'view')
if set_view.lower() in ['htmlview', 'html']:
return HTMLView(filter_generator)
elif set_view.lower() in ['textview', 'text']:
return TextView(filter_generator)
elif set_view.lower() in ['passthroughview', 'passthrough']:
return PassthroughView(filter_generator)
if util.use_passthrough_view(filter_generator.file_generator.file_headers):
return PassthroughView(filter_generator)
elif util.should_be_html(environ):
return HTMLView(filter_generator)
else:
return TextView(filter_generator)

View File

@ -15,7 +15,6 @@
# under the License.
import cgi
import ConfigParser
import fileinput
import os.path
@ -23,7 +22,6 @@ import sys
import os_loganalyze.filter as osfilter
import os_loganalyze.generator as osgen
import os_loganalyze.util as util
import os_loganalyze.view as osview
@ -34,57 +32,12 @@ def htmlify_stdin():
out.write(line)
def should_be_html(environ):
"""Simple content negotiation.
If the client supports content negotiation, and asks for text/html,
we give it to them, unless they also specifically want to override
by passing ?content-type=text/plain in the query.
This should be able to handle the case of dumb clients defaulting to
html, but also let devs override the text format when 35 MB html
log files kill their browser (as per a nova-api log).
"""
text_override = False
accepts_html = ('HTTP_ACCEPT' in environ and
'text/html' in environ['HTTP_ACCEPT'])
parameters = cgi.parse_qs(environ.get('QUERY_STRING', ''))
if 'content-type' in parameters:
ct = cgi.escape(parameters['content-type'][0])
if ct == 'text/plain':
text_override = True
return accepts_html and not text_override
def get_config(wsgi_config):
config = ConfigParser.ConfigParser()
config.read(os.path.expanduser(wsgi_config))
return config
def use_passthrough_view(file_headers):
"""Determine if we need to use the passthrough filter."""
if 'content-type' not in file_headers:
# For legacy we'll try and format. This shouldn't occur though.
return False
else:
if file_headers['content-type'] in ['text/plain', 'text/html']:
# We want to format these files
return False
if file_headers['content-type'] in ['application/x-gzip',
'application/gzip']:
# We'll need to guess if we should render the output or offer a
# download.
filename = file_headers['filename']
filename = filename[:-3] if filename[-3:] == '.gz' else filename
if os.path.splitext(filename)[1] in ['.txt', '.html']:
return False
return True
def application(environ, start_response, root_path=None,
wsgi_config='/etc/os_loganalyze/wsgi.conf'):
if root_path is None:
@ -111,18 +64,10 @@ def application(environ, start_response, root_path=None,
start_response(status, response_headers)
return ['File Not Found']
if use_passthrough_view(file_generator.file_headers):
view_generator = osview.PassthroughView(file_generator)
else:
minsev = util.parse_param(environ, 'level', default="NONE")
limit = util.parse_param(environ, 'limit')
flines_generator = osfilter.Filter(file_generator, minsev, limit)
if environ.get('OS_LOGANALYZE_STRIP', None):
flines_generator.strip_control = True
if should_be_html(environ):
view_generator = osview.HTMLView(flines_generator)
else:
view_generator = osview.TextView(flines_generator)
filter_generator = osfilter.get_filter_generator(file_generator, environ,
root_path, config)
view_generator = osview.get_view_generator(filter_generator, environ,
root_path, config)
start_response(status, view_generator.headers)
return view_generator