os-loganalyze/os_loganalyze/generator.py

177 lines
5.6 KiB
Python

#!/usr/bin/env python
#
# Copyright (c) 2013 IBM Corp.
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
# Copyright (c) 2014 Rackspace Australia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import datetime
import fileinput
import os.path
import re
import jinja2
import os_loganalyze.util as util
class UnsafePath(Exception):
pass
class NoSuchFile(Exception):
pass
def does_file_exist(fname):
"""Figure out if we'll be able to read this file.
Because we are handling the file streams as generators, we actually raise
an exception too late for us to be able to handle it before apache has
completely control. This attempts to do the same open outside of the
generator to trigger the IOError early enough for us to catch it, without
completely changing the logic flow, as we really want the generator
pipeline for performance reasons.
This does open us up to a small chance for a race where the file comes
or goes between this call and the next, however that is a vanishingly
small possibility.
"""
try:
f = open(fname)
f.close()
return True
except IOError:
return False
def log_name(environ):
path = environ['PATH_INFO']
if path[0] == '/':
path = path[1:]
match = re.search('htmlify/(.*)', path)
if match:
raw = match.groups(1)[0]
return raw
return path
def safe_path(root, log_name):
"""Pull out a safe path from a url.
Basically we need to ensure that the final computed path
remains under the root path. If not, we return None to indicate
that we are very sad.
"""
if log_name is not None:
newpath = os.path.abspath(os.path.join(root, log_name))
if newpath.find(root) == 0:
return newpath
return None
def sizeof_fmt(num, suffix='B'):
# From http://stackoverflow.com/questions/1094841/
# reusable-library-to-get-human-readable-version-of-file-size
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Y', suffix)
class DiskIterableBuffer(collections.Iterable):
def __init__(self, logname, logpath, config):
self.logname = logname
self.logpath = logpath
self.resp_headers = {}
self.obj = fileinput.FileInput(self.logpath,
openhook=fileinput.hook_compressed)
self.file_headers = {}
self.file_headers['filename'] = logname
self.file_headers.update(util.get_headers_for_file(logpath))
def __iter__(self):
return self.obj
class IndexIterableBuffer(collections.Iterable):
def __init__(self, logname, logpath, config):
self.logname = logname
self.logpath = logpath
self.config = config
self.resp_headers = {}
self.file_headers = {}
self.file_headers['Content-type'] = 'text/html'
# Use sets here to dedup. We can have duplicates
# if disk and swift based paths have overlap.
file_set = self.disk_list()
# file_list is a list of tuples (relpath, name, mtime, size)
self.file_list = sorted(file_set, key=lambda tup: tup[0])
def disk_list(self):
file_set = set()
if os.path.isdir(self.logpath):
for f in os.listdir(self.logpath):
full_path = os.path.join(self.logpath, f)
stat_info = os.stat(full_path)
size = sizeof_fmt(stat_info.st_size)
mtime = datetime.datetime.utcfromtimestamp(
stat_info.st_mtime).isoformat()
if os.path.isdir(full_path):
f = f + '/' if f[-1] != '/' else f
file_set.add((
os.path.join('/', self.logname, f),
f,
mtime,
size
))
return file_set
def __iter__(self):
env = jinja2.Environment(
loader=jinja2.PackageLoader('os_loganalyze', 'templates'))
template = env.get_template('file_index.html')
gen = template.generate(logname=self.logname,
file_list=self.file_list)
for l in gen:
yield l.encode("utf-8")
def get_file_generator(environ, root_path, config=None):
logname = log_name(environ)
logpath = safe_path(root_path, logname)
if logpath is None:
raise UnsafePath()
file_generator = None
if does_file_exist(logpath):
file_generator = DiskIterableBuffer(logname, logpath, config)
if not file_generator or not file_generator.obj:
if (config.has_section('general') and
config.has_option('general', 'generate_folder_index') and
config.getboolean('general', 'generate_folder_index')):
index_generator = IndexIterableBuffer(logname, logpath,
config)
if len(index_generator.file_list) > 0:
return index_generator
raise NoSuchFile()
return file_generator