Allow the config to set the filter and view

Currently os-loganalyze will detect what filter and view to use based
off the file type, name and other headers. Instead, allow the config
to define exactly what view to use. If none is set then the legacy
detection will still be applied.

Next change will allow the config to define filters and views based
off file match conditions.

Change-Id: I8955577c100b13ce20609426025a68fbbd052423
This commit is contained in:
Joshua Hesketh 2015-04-14 22:14:25 +10:00
parent 0a4c50260d
commit d4ac63b711
9 changed files with 169 additions and 75 deletions

View File

@ -1,3 +1,7 @@
[general]
filter = SevFilter
view = HTMLView
[swift] [swift]
authurl=https://keystone.example.org/v2.0/ authurl=https://keystone.example.org/v2.0/
user=example user=example

View File

@ -17,6 +17,8 @@
import re import re
import os_loganalyze.util as util
# which logs support severity # which logs support severity
SUPPORTS_SEV = re.compile( SUPPORTS_SEV = re.compile(
r'/' # this uses an re.search so anchor the string r'/' # this uses an re.search so anchor the string
@ -94,7 +96,7 @@ class LogLine(object):
self.line = line.rstrip() self.line = line.rstrip()
class Filter(object): class SevFilter(object):
def __init__(self, file_generator, minsev="NONE", limit=None): def __init__(self, file_generator, minsev="NONE", limit=None):
self.minsev = minsev self.minsev = minsev
@ -136,3 +138,41 @@ class Filter(object):
""" """
minsev = self.minsev minsev = self.minsev
return SEVS.get(sev, 0) < SEVS.get(minsev, 0) return SEVS.get(sev, 0) < SEVS.get(minsev, 0)
class Line(object):
date = ''
def __init__(self, line):
self.line = line
class NoFilter(object):
supports_sev = False
def __init__(self, file_generator):
self.file_generator = file_generator
def __iter__(self):
for line in self.file_generator:
yield Line(line)
def get_filter_generator(file_generator, environ, root_path, config):
"""Return the filter to use as per the config."""
minsev = util.parse_param(environ, 'level', default="NONE")
limit = util.parse_param(environ, 'limit')
if config.has_section('general'):
if config.has_option('general', 'filter'):
set_filter = config.get('general', 'filter')
if set_filter.lower() in ['sevfilter', 'sev']:
return SevFilter(file_generator, minsev, limit)
elif set_filter.lower() in ['nofilter', 'no']:
return NoFilter(file_generator)
if util.use_passthrough_view(file_generator.file_headers):
return NoFilter(file_generator)
return SevFilter(file_generator, minsev, limit)

View File

@ -1,3 +1,6 @@
[general]
# Don't override the filter or view default detection
[swift] [swift]
authurl=https://keystone.example.org/v2.0/ authurl=https://keystone.example.org/v2.0/
user=example user=example

View File

@ -0,0 +1,12 @@
[general]
filter = nofilter
view = passthrough
[swift]
authurl=https://keystone.example.org/v2.0/
user=example
password=example
container=logs
region=EXP
tenant=
chunk_size=64

View File

@ -30,8 +30,8 @@ class TestViews(base.TestCase):
kwargs = {'PATH_INFO': '/htmlify/%s' % fname} kwargs = {'PATH_INFO': '/htmlify/%s' % fname}
file_generator = osgen.get_file_generator(self.fake_env(**kwargs), file_generator = osgen.get_file_generator(self.fake_env(**kwargs),
root_path) root_path)
flines_generator = osfilter.Filter(file_generator) filter_generator = osfilter.SevFilter(file_generator)
return flines_generator return filter_generator
def test_html_detection(self): def test_html_detection(self):
gen = self.get_generator('sample.html') gen = self.get_generator('sample.html')

View File

@ -171,6 +171,30 @@ class TestWsgiDisk(base.TestCase):
with open(base.samples_path('samples') + 'openstack_logo.png') as f: with open(base.samples_path('samples') + 'openstack_logo.png') as f:
self.assertEqual(first, f.readline()) self.assertEqual(first, f.readline())
def test_config_no_filter(self):
self.wsgi_config_file = (base.samples_path('samples') +
'wsgi_plain.conf')
# Try to limit the filter to 10 lines, but we should get the full
# amount.
gen = self.get_generator('devstacklog.txt.gz', limit=10)
lines = 0
for line in gen:
lines += 1
# the lines should actually be 2 + the limit we've asked for
# given the header and footer, but we expect to get the full file
self.assertNotEqual(12, lines)
def test_config_passthrough_view(self):
self.wsgi_config_file = (base.samples_path('samples') +
'wsgi_plain.conf')
# Check there is no HTML on a file that should otherwise have it
gen = self.get_generator('devstacklog.txt.gz')
first = gen.next()
self.assertNotIn('<html>', first)
class TestWsgiSwift(TestWsgiDisk): class TestWsgiSwift(TestWsgiDisk):
"""Test loading files from swift.""" """Test loading files from swift."""

View File

@ -60,3 +60,47 @@ def get_headers_for_file(file_path):
resp['date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT") resp['date'] = time.strftime("%a, %d %b %Y %H:%M:%S GMT")
resp['content-type'] = get_file_mime(file_path) resp['content-type'] = get_file_mime(file_path)
return resp return resp
def should_be_html(environ):
"""Simple content negotiation.
If the client supports content negotiation, and asks for text/html,
we give it to them, unless they also specifically want to override
by passing ?content-type=text/plain in the query.
This should be able to handle the case of dumb clients defaulting to
html, but also let devs override the text format when 35 MB html
log files kill their browser (as per a nova-api log).
"""
text_override = False
accepts_html = ('HTTP_ACCEPT' in environ and
'text/html' in environ['HTTP_ACCEPT'])
parameters = cgi.parse_qs(environ.get('QUERY_STRING', ''))
if 'content-type' in parameters:
ct = cgi.escape(parameters['content-type'][0])
if ct == 'text/plain':
text_override = True
return accepts_html and not text_override
def use_passthrough_view(file_headers):
"""Guess if we need to use the passthrough filter."""
if 'content-type' not in file_headers:
# For legacy we'll try and format. This shouldn't occur though.
return False
else:
if file_headers['content-type'] in ['text/plain', 'text/html']:
# We want to format these files
return False
if file_headers['content-type'] in ['application/x-gzip',
'application/gzip']:
# We'll need to guess if we should render the output or offer a
# download.
filename = file_headers['filename']
filename = filename[:-3] if filename[-3:] == '.gz' else filename
if os.path.splitext(filename)[1] in ['.txt', '.html']:
return False
return True

View File

@ -17,6 +17,8 @@ import cgi
import collections import collections
import re import re
import os_loganalyze.util as util
HTML_HEADER = """<html> HTML_HEADER = """<html>
<head> <head>
<style> <style>
@ -121,8 +123,8 @@ class HTMLView(collections.Iterable):
is_html = False is_html = False
no_escape_count = 0 no_escape_count = 0
def __init__(self, gen): def __init__(self, filter_generator):
self.gen = gen self.filter_generator = filter_generator
def _discover_html(self, line): def _discover_html(self, line):
self.is_html = HTML_RE.match(line) self.is_html = HTML_RE.match(line)
@ -161,13 +163,13 @@ class HTMLView(collections.Iterable):
return newline return newline
def __iter__(self): def __iter__(self):
igen = (x for x in self.gen) igen = (x for x in self.filter_generator)
first_line = next(igen) first_line = next(igen)
self._discover_html(first_line.line) self._discover_html(first_line.line)
if not self.is_html: if not self.is_html:
header = HTML_HEADER header = HTML_HEADER
if self.gen.supports_sev: if self.filter_generator.supports_sev:
header += HTML_HEADER_SEV header += HTML_HEADER_SEV
header += HTML_HEADER_BODY header += HTML_HEADER_BODY
yield header yield header
@ -188,22 +190,42 @@ class HTMLView(collections.Iterable):
class TextView(collections.Iterable): class TextView(collections.Iterable):
headers = [('Content-type', 'text/plain')] headers = [('Content-type', 'text/plain')]
def __init__(self, gen): def __init__(self, filter_generator):
self.gen = gen self.filter_generator = filter_generator
def __iter__(self): def __iter__(self):
for line in self.gen: for line in self.filter_generator:
yield line.date + line.line + "\n" yield line.date + line.line + "\n"
class PassthroughView(collections.Iterable): class PassthroughView(collections.Iterable):
headers = [] headers = []
def __init__(self, gen): def __init__(self, filter_generator):
self.gen = gen self.filter_generator = filter_generator
for hn, hv in self.gen.file_headers.items(): for k, v in self.filter_generator.file_generator.file_headers.items():
self.headers.append((hn, hv)) self.headers.append((k, v))
def __iter__(self): def __iter__(self):
for line in self.gen: for line in self.filter_generator:
yield line yield line.line
def get_view_generator(filter_generator, environ, root_path, config):
"""Return the view to use as per the config."""
if config.has_section('general'):
if config.has_option('general', 'view'):
set_view = config.get('general', 'view')
if set_view.lower() in ['htmlview', 'html']:
return HTMLView(filter_generator)
elif set_view.lower() in ['textview', 'text']:
return TextView(filter_generator)
elif set_view.lower() in ['passthroughview', 'passthrough']:
return PassthroughView(filter_generator)
if util.use_passthrough_view(filter_generator.file_generator.file_headers):
return PassthroughView(filter_generator)
elif util.should_be_html(environ):
return HTMLView(filter_generator)
else:
return TextView(filter_generator)

View File

@ -15,7 +15,6 @@
# under the License. # under the License.
import cgi
import ConfigParser import ConfigParser
import fileinput import fileinput
import os.path import os.path
@ -23,7 +22,6 @@ import sys
import os_loganalyze.filter as osfilter import os_loganalyze.filter as osfilter
import os_loganalyze.generator as osgen import os_loganalyze.generator as osgen
import os_loganalyze.util as util
import os_loganalyze.view as osview import os_loganalyze.view as osview
@ -34,57 +32,12 @@ def htmlify_stdin():
out.write(line) out.write(line)
def should_be_html(environ):
"""Simple content negotiation.
If the client supports content negotiation, and asks for text/html,
we give it to them, unless they also specifically want to override
by passing ?content-type=text/plain in the query.
This should be able to handle the case of dumb clients defaulting to
html, but also let devs override the text format when 35 MB html
log files kill their browser (as per a nova-api log).
"""
text_override = False
accepts_html = ('HTTP_ACCEPT' in environ and
'text/html' in environ['HTTP_ACCEPT'])
parameters = cgi.parse_qs(environ.get('QUERY_STRING', ''))
if 'content-type' in parameters:
ct = cgi.escape(parameters['content-type'][0])
if ct == 'text/plain':
text_override = True
return accepts_html and not text_override
def get_config(wsgi_config): def get_config(wsgi_config):
config = ConfigParser.ConfigParser() config = ConfigParser.ConfigParser()
config.read(os.path.expanduser(wsgi_config)) config.read(os.path.expanduser(wsgi_config))
return config return config
def use_passthrough_view(file_headers):
"""Determine if we need to use the passthrough filter."""
if 'content-type' not in file_headers:
# For legacy we'll try and format. This shouldn't occur though.
return False
else:
if file_headers['content-type'] in ['text/plain', 'text/html']:
# We want to format these files
return False
if file_headers['content-type'] in ['application/x-gzip',
'application/gzip']:
# We'll need to guess if we should render the output or offer a
# download.
filename = file_headers['filename']
filename = filename[:-3] if filename[-3:] == '.gz' else filename
if os.path.splitext(filename)[1] in ['.txt', '.html']:
return False
return True
def application(environ, start_response, root_path=None, def application(environ, start_response, root_path=None,
wsgi_config='/etc/os_loganalyze/wsgi.conf'): wsgi_config='/etc/os_loganalyze/wsgi.conf'):
if root_path is None: if root_path is None:
@ -111,18 +64,10 @@ def application(environ, start_response, root_path=None,
start_response(status, response_headers) start_response(status, response_headers)
return ['File Not Found'] return ['File Not Found']
if use_passthrough_view(file_generator.file_headers): filter_generator = osfilter.get_filter_generator(file_generator, environ,
view_generator = osview.PassthroughView(file_generator) root_path, config)
else: view_generator = osview.get_view_generator(filter_generator, environ,
minsev = util.parse_param(environ, 'level', default="NONE") root_path, config)
limit = util.parse_param(environ, 'limit')
flines_generator = osfilter.Filter(file_generator, minsev, limit)
if environ.get('OS_LOGANALYZE_STRIP', None):
flines_generator.strip_control = True
if should_be_html(environ):
view_generator = osview.HTMLView(flines_generator)
else:
view_generator = osview.TextView(flines_generator)
start_response(status, view_generator.headers) start_response(status, view_generator.headers)
return view_generator return view_generator