diff --git a/os_loganalyze/filter.py b/os_loganalyze/filter.py index a397763..8c94b9a 100644 --- a/os_loganalyze/filter.py +++ b/os_loganalyze/filter.py @@ -96,11 +96,11 @@ class LogLine(object): class Filter(object): - def __init__(self, fname, generator, minsev="NONE", limit=None): + def __init__(self, file_generator, minsev="NONE", limit=None): self.minsev = minsev - self.gen = generator - self.supports_sev = SUPPORTS_SEV.search(fname) is not None - self.fname = fname + self.file_generator = file_generator + self.supports_sev = \ + SUPPORTS_SEV.search(file_generator.logname) is not None self.limit = limit self.strip_control = False @@ -110,9 +110,9 @@ class Filter(object): def __iter__(self): old_sev = "NONE" lineno = 1 - for line in self.gen: + for line in self.file_generator: # bail early for limits - if self.limit and lineno >= int(self.limit): + if self.limit and lineno > int(self.limit): raise StopIteration() # strip control chars in case the console is ascii colored if self.strip_control: diff --git a/os_loganalyze/generator.py b/os_loganalyze/generator.py index f238ead..37ff3f7 100644 --- a/os_loganalyze/generator.py +++ b/os_loganalyze/generator.py @@ -16,6 +16,7 @@ # License for the specific language governing permissions and limitations # under the License. +import collections import fileinput import os.path import re @@ -104,73 +105,99 @@ def _get_swift_connection(swift_config): _get_swift_connection.con = None -def get_swift_line_generator(logname, config): - resp_headers = {} - if not config.has_section('swift'): - sys.stderr.write('Not configured to use swift..\n') - sys.stderr.write('logname: %s\n' % logname) - return resp_headers, None +class SwiftIterableBuffer(collections.Iterable): + file_headers = {} - try: - swift_config = dict(config.items('swift')) - con = _get_swift_connection(swift_config) + def __init__(self, logname, config): + self.logname = logname + self.resp_headers = {} + self.obj = None + self.file_headers['filename'] = logname - chunk_size = int(swift_config.get('chunk_size', 64)) - if chunk_size < 1: - chunk_size = None + if not config.has_section('swift'): + sys.stderr.write('Not configured to use swift..\n') + sys.stderr.write('logname: %s\n' % logname) + else: + try: + swift_config = dict(config.items('swift')) + # NOTE(jhesketh): While _get_siwft_connection seems like it + # should be part of this class we actually still need it + # outside to maintain the connection across multiple objects. + # Each SwiftIterableBuffer is a new object request, not + # necessarily a new swift connection (hopefully we can reuse + # connections). I think the place to put the get connection + # in the future would be in the server.py (todo). + con = _get_swift_connection(swift_config) - resp_headers, obj = con.get_object( - swift_config['container'], logname, - resp_chunk_size=chunk_size) + chunk_size = int(swift_config.get('chunk_size', 64)) + if chunk_size < 1: + chunk_size = None - def line_generator(): - ext = os.path.splitext(logname)[1] - if ext == '.gz': - # Set up a decompression object assuming the deflate - # compression algorithm was used - d = zlib.decompressobj(16 + zlib.MAX_WBITS) + self.resp_headers, self.obj = con.get_object( + swift_config['container'], logname, + resp_chunk_size=chunk_size) + self.file_headers.update(self.resp_headers) + except Exception: + import traceback + sys.stderr.write("Error fetching from swift.\n") + sys.stderr.write('logname: %s\n' % logname) + traceback.print_exc() - if isinstance(obj, types.GeneratorType): - buf = next(obj) - partial = '' - while buf: - if ext == '.gz': - string = partial + d.decompress(buf) - else: - string = partial + buf - split = string.split('\n') - for line in split[:-1]: - yield line + '\n' - partial = split[-1] - try: - buf = next(obj) - except StopIteration: - break - if partial != '': - yield partial - else: - output = obj + def __iter__(self): + ext = os.path.splitext(self.logname)[1] + if ext == '.gz': + # Set up a decompression object assuming the deflate + # compression algorithm was used + d = zlib.decompressobj(16 + zlib.MAX_WBITS) + + if isinstance(self.obj, types.GeneratorType): + buf = next(self.obj) + partial = '' + while buf: if ext == '.gz': - output = d.decompress(output) - - split = output.split('\n') + string = partial + d.decompress(buf) + else: + string = partial + buf + split = string.split('\n') for line in split[:-1]: yield line + '\n' partial = split[-1] - if partial != '': - yield partial + try: + buf = next(self.obj) + except StopIteration: + break + if partial != '': + yield partial + else: + output = self.obj + if ext == '.gz': + output = d.decompress(output) - return resp_headers, line_generator() - - except Exception: - import traceback - sys.stderr.write("Error fetching from swift.\n") - sys.stderr.write('logname: %s\n' % logname) - traceback.print_exc() - return resp_headers, None + split = output.split('\n') + for line in split[:-1]: + yield line + '\n' + partial = split[-1] + if partial != '': + yield partial -def get(environ, root_path, config=None): +class DiskIterableBuffer(collections.Iterable): + file_headers = {} + + def __init__(self, logname, logpath, config): + self.logname = logname + self.logpath = logpath + self.resp_headers = {} + self.obj = fileinput.FileInput(self.logpath, + openhook=fileinput.hook_compressed) + self.file_headers['filename'] = logname + self.file_headers.update(util.get_headers_for_file(logpath)) + + def __iter__(self): + return self.obj + + +def get_file_generator(environ, root_path, config=None): logname = log_name(environ) logpath = safe_path(root_path, logname) file_headers = {} @@ -178,24 +205,19 @@ def get(environ, root_path, config=None): raise UnsafePath() file_headers['filename'] = os.path.basename(logpath) - flines_generator = None + file_generator = None # if we want swift only, we'll skip processing files use_files = (util.parse_param(environ, 'source', default='all') != 'swift') if use_files and does_file_exist(logpath): - flines_generator = fileinput.FileInput( - logpath, openhook=fileinput.hook_compressed) - file_headers.update(util.get_headers_for_file(logpath)) + file_generator = DiskIterableBuffer(logname, logpath, config) else: - resp_headers, flines_generator = get_swift_line_generator(logname, - config) - if not flines_generator: + file_generator = SwiftIterableBuffer(logname, config) + if not file_generator.obj: logname = os.path.join(logname, 'index.html') - resp_headers, flines_generator = get_swift_line_generator(logname, - config) - file_headers.update(resp_headers) + file_generator = SwiftIterableBuffer(logname, config) - if not flines_generator: + if not file_generator.obj: raise NoSuchFile() - return logname, flines_generator, file_headers + return file_generator diff --git a/os_loganalyze/tests/test_views.py b/os_loganalyze/tests/test_views.py index a682683..96fa9af 100644 --- a/os_loganalyze/tests/test_views.py +++ b/os_loganalyze/tests/test_views.py @@ -28,8 +28,9 @@ class TestViews(base.TestCase): # wsgi application. We just need the generator to give to Views. root_path = base.samples_path(self.samples_directory) kwargs = {'PATH_INFO': '/htmlify/%s' % fname} - logname, gen, headers = osgen.get(self.fake_env(**kwargs), root_path) - flines_generator = osfilter.Filter(logname, gen) + file_generator = osgen.get_file_generator(self.fake_env(**kwargs), + root_path) + flines_generator = osfilter.Filter(file_generator) return flines_generator def test_html_detection(self): diff --git a/os_loganalyze/view.py b/os_loganalyze/view.py index fddcff4..af24757 100644 --- a/os_loganalyze/view.py +++ b/os_loganalyze/view.py @@ -161,7 +161,8 @@ class HTMLView(collections.Iterable): return newline def __iter__(self): - first_line = next(x for x in self.gen) + igen = (x for x in self.gen) + first_line = next(igen) self._discover_html(first_line.line) if not self.is_html: @@ -175,7 +176,7 @@ class HTMLView(collections.Iterable): if first: yield first - for line in self.gen: + for line in igen: newline = self._process_line(line) if newline: yield newline @@ -198,9 +199,9 @@ class TextView(collections.Iterable): class PassthroughView(collections.Iterable): headers = [] - def __init__(self, gen, file_headers): + def __init__(self, gen): self.gen = gen - for hn, hv in file_headers.items(): + for hn, hv in self.gen.file_headers.items(): self.headers.append((hn, hv)) def __iter__(self): diff --git a/os_loganalyze/wsgi.py b/os_loganalyze/wsgi.py index b77f1a8..3bd2d5d 100755 --- a/os_loganalyze/wsgi.py +++ b/os_loganalyze/wsgi.py @@ -99,8 +99,7 @@ def application(environ, start_response, root_path=None, status = '200 OK' try: - logname, flines_generator, file_headers = osgen.get(environ, root_path, - config) + file_generator = osgen.get_file_generator(environ, root_path, config) except osgen.UnsafePath: status = '400 Bad Request' response_headers = [('Content-type', 'text/plain')] @@ -112,23 +111,21 @@ def application(environ, start_response, root_path=None, start_response(status, response_headers) return ['File Not Found'] - if use_passthrough_view(file_headers): - generator = osview.PassthroughView(flines_generator, - file_headers) + if use_passthrough_view(file_generator.file_headers): + view_generator = osview.PassthroughView(file_generator) else: minsev = util.parse_param(environ, 'level', default="NONE") limit = util.parse_param(environ, 'limit') - flines_generator = osfilter.Filter( - logname, flines_generator, minsev, limit) + flines_generator = osfilter.Filter(file_generator, minsev, limit) if environ.get('OS_LOGANALYZE_STRIP', None): flines_generator.strip_control = True if should_be_html(environ): - generator = osview.HTMLView(flines_generator) + view_generator = osview.HTMLView(flines_generator) else: - generator = osview.TextView(flines_generator) + view_generator = osview.TextView(flines_generator) - start_response(status, generator.headers) - return generator + start_response(status, view_generator.headers) + return view_generator # for development purposes, makes it easy to test the filter output