Merge pull request #25 from Akrog/issue7/other_log_formats

Add support for other log formats
This commit is contained in:
Miguel Ángel Ajo 2016-03-29 12:21:01 +02:00
commit 561b4310f7
3 changed files with 257 additions and 44 deletions

View File

@ -10,6 +10,9 @@ Changelog
- Add base log path option: `-b` `--log-base`.
- Log postfix option: `-p` `--log-postfix`.
- Auto alias generation: `-a` `--alias-level`.
- Add support for default /var/log/messages datetime format files with
`-ml [FILE [FILE]]`
- Add support for timestamped log files with `-tl [FILE [FILE]]`
**Bugfixes:**

View File

@ -93,6 +93,50 @@ Example for Cinder:
$ os-log-merger -b /var/log/cinder/ -p .log api:api scheduler:sch volume:vol
/var/log/messages
~~~~~~~~~~~~~~~~~
os-log-merger also supports /var/log/messages type of files with options `-ml`
and `--msg-logs` options.
Since the format for those files is missing year information -MAR 24 14:11:19-
the year from the last file modification will be used.
These files can also be specified with globs and they support alias definition
as well.
Beware that openstack files should be listed before `-ml` option files.
Example for Cinder:
.. code:: bash
$ os-log-merger -b /var/log/ cinder/api.log:API -ml messages:MSG *.log
Timestamped logs
~~~~~~~~~~~~~~~~
os-log-merger also supports timestamped -[ 0.003036]- with options `-tl`
and `--timestamp-logs` options.
Since timestamp many times will not take epoc time as the source of the
timestamp but the time the system started, the initial datetime will be
calculated by substracting from the file modified datetime the last timestamp
in the file.
These files can also be specified with globs and they support alias definition
as well.
Beware that openstack files should be listed before `-tl` option files.
Example for Cinder:
.. code:: bash
$ os-log-merger -b /var/log/ cinder/api.log:API -tl dmesg:DMSG
Auto Alias
~~~~~~~~~~

View File

@ -1,10 +1,11 @@
from __future__ import print_function
import argparse
from datetime import datetime
from datetime import datetime, timedelta
import hashlib
import os
import sys
import tempfile
import time
import urllib2
@ -74,15 +75,94 @@ FILE_MAP = {
}
class OpenStackLog:
def __init__(self, filename):
self._open(filename)
class LogEntry(object):
separator = ' '
date_format = None
_date_parse_msg = 'unconverted data remains: '
def _open(self, filename):
def __init__(self, **kwargs):
self._date_length = None
self.__dict__.update(**kwargs)
@classmethod
def get_init_args(cls, filename):
return {}
def prepare_line(self, line):
return line
def parse_date(self, line):
try:
dt = datetime.strptime(line, self.date_format)
except ValueError as e:
if not e.args[0].startswith(self._date_parse_msg):
raise
prepared_date_length = (len(line) - len(e.args[0]) +
len(self._date_parse_msg))
dt = datetime.strptime(line[:prepared_date_length],
self.date_format)
return dt
def _calculate_date_length(self):
return len(self.date.strftime(self.date_format))
@property
def date_length(self):
if not self._date_length:
self._date_length = self._calculate_date_length()
return self._date_length
@classmethod
def factory(cls, filename, line, **kwargs):
self = cls(**kwargs)
self.filename = filename
if not line:
raise ValueError
# Prepare the line for date parsing
prepared_line = self.prepare_line(line)
# Extract the datetime
self.date = self.parse_date(prepared_line)
if (len(line) == self.date_length or
line[self.date_length] != self.separator):
raise ValueError
self.date_str = line[:self.date_length]
# +1 to remove the separator so we don't have 2 spaces on output
self.data = line[self.date_length + 1:]
return self
def append_line(self, line):
self.data += EXTRALINES_PADDING + line
def __cmp__(self, other):
return cmp(self.date, other.date)
class LogFile(object):
log_entry_class = LogEntry
@staticmethod
def factory(cls, filename):
instance = LogFile(filename)
instance.log_entry_class = cls
instance.entry_kwargs = cls.get_init_args(filename)
return instance
def __init__(self, filename):
self.open(filename)
def open(self, filename):
self._filename = filename
if filename.startswith("http://"):
filename = self._cached_download(filename)
self._file = open(filename, 'r')
stat = os.stat(filename)
self.mtime = datetime.fromtimestamp(stat.st_mtime)
def _url_cache_path(self, url):
md5 = hashlib.md5()
@ -104,29 +184,16 @@ class OpenStackLog:
file_out = open(path, 'w')
file_out.write(http_in.read())
file_out.close()
# Set the file time to the one from the URL
info = http_in.info()
m_date = info.getdate('date')
mtime = time.mktime(m_date)
os.utime(path, (mtime, mtime))
http_in.close()
return path
def _extract_with_date(self, line):
try:
# TODO(mangelajo): We support the default log format
# so far, but we may need to discover
# different ones.
chunks = line.split(" ")
datetime_str = ' '.join(chunks[:2])
# this is likely to be not necessary, we can just compare
# strings, and that's going to be faster than parsing
# and regenerating later, but, could be useful when mixing
# log and date formats.
date_object = datetime.strptime(
datetime_str, "%Y-%m-%d %H:%M:%S.%f")
pid, level = chunks[2], chunks[3]
rest = ' '.join(chunks[4:])
return (date_object, datetime_str, self._filename, pid, level,
rest)
except IndexError:
return None
def __iter__(self):
self.entry = None
self.next_entry = None
@ -139,7 +206,9 @@ class OpenStackLog:
return entry, None
try:
new_entry = self._extract_with_date(line)
new_entry = self.log_entry_class.factory(self._filename,
line,
**self.entry_kwargs)
if new_entry is None:
continue
if entry:
@ -150,9 +219,7 @@ class OpenStackLog:
# it's a non-dated line, just append to the entry
# extra info
if entry:
(date_object, date_str, filename, pid, level, rest) = entry
entry = (date_object, date_str, filename, pid, level,
rest + EXTRALINES_PADDING + line)
entry.append_line(line)
def __next__(self):
return self.next()
@ -174,7 +241,80 @@ class OpenStackLog:
if (other.peek() or self.peek()) is None:
return 0 if self.peek() is None else -1
return cmp(self.peek()[0], other.peek()[0])
return cmp(self.peek(), other.peek())
class MsgLogEntry(LogEntry):
"""Message format: Oct 15 14:11:19"""
date_format = '%Y%b %d %H:%M:%S'
@classmethod
def get_init_args(cls, filename):
kwargs = super(MsgLogEntry, cls).get_init_args(filename)
stat = os.stat(filename)
kwargs['file_year'] = datetime.fromtimestamp(stat.st_mtime).year
return kwargs
def prepare_line(self, line):
# TODO: If year of file creation and file last modification are
# different we should start with the cration year and then change to
# the next year once the months go back.
return '%s%s' % (self.file_year, line)
def _calculate_date_length(self):
return super(MsgLogEntry, self)._calculate_date_length() - 4
class OSLogEntry(LogEntry):
"""OpenStack default log: 2016-02-01 10:22:59.239"""
date_format = '%Y-%m-%d %H:%M:%S.%f'
def _calculate_date_length(self):
return super(OSLogEntry, self)._calculate_date_length() - 3
class TSLogEntry(LogEntry):
"""Timestamped log: [275514.814982]"""
@classmethod
def get_init_args(cls, filename):
kwargs = super(TSLogEntry, cls).get_init_args(filename)
stat = os.stat(filename)
mtime = datetime.fromtimestamp(stat.st_mtime)
timestamp = cls._get_last_timestamp(filename)
kwargs['start_date'] = mtime - timedelta(seconds=timestamp)
return kwargs
@classmethod
def _get_last_timestamp(cls, filename):
result = None
with open(filename, 'r') as f:
file_size = os.fstat(f.fileno()).st_size
# We will jump to the last KB so we don't have to read all file
offset = max(0, file_size - 1024)
f.seek(offset)
for line in f:
try:
__, result = cls._read_timestamp(line)
except ValueError:
continue
return result
@staticmethod
def _read_timestamp(line):
start = line.index('[') + 1
end = line.index(']')
if end < start:
raise ValueError
return end, float(line[start:end])
def parse_date(self, date_str):
end, timestamp = self._read_timestamp(date_str)
self._date_length = end + 1
return self.start_date + timedelta(seconds=timestamp)
def process_logs_limit_memory_usage(logs):
@ -207,20 +347,28 @@ def process_logs_memory_hog(logs):
for entry in log:
all_entries.append(entry)
sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
sorted_entries = sorted(all_entries)
for entry in sorted_entries:
yield entry
LOG_TYPES = [
('logfiles', OSLogEntry),
('logfiles_m', MsgLogEntry),
('logfiles_t', TSLogEntry),
]
def process_logs(cfg):
filename_alias = {}
logs = []
for filename in cfg.logfiles:
path, alias, is_url = get_path_and_alias(filename,
cfg.log_base,
cfg.log_postfix)
filename_alias[path] = (filename, alias, is_url)
logs.append(OpenStackLog(path))
for arg_name, entry_cls in LOG_TYPES:
for filename in getattr(cfg, arg_name):
path, alias, is_url = get_path_and_alias(filename,
cfg.log_base,
cfg.log_postfix)
filename_alias[path] = (filename, alias, is_url)
logs.append(LogFile.factory(entry_cls, path))
alias = generate_aliases(filename_alias, cfg)
@ -230,9 +378,8 @@ def process_logs(cfg):
method = process_logs_memory_hog
for entry in method(logs):
(date_object, date_str, filename, pid, level, rest) = entry
print (' '.join([date_str, '[%s]' % alias[filename], pid,
level, rest]).rstrip('\n'))
print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
entry.data.rstrip('\n')))
def get_path_and_alias(filename, log_base, log_postfix):
@ -414,9 +561,22 @@ alias. Use the aliases if you want shorter line lengths.
Logs are expected to contain lines in the following format:
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
Y-m-d H:M:S.mmm ............
Y-m-d H:M:S.mmm ............
[ extra line info ..... ]
Logs with default /var/log/messages datetime format (Oct 15 14:11:19)
can optionally be merged as well using "--msg-logs" or "-ml"
options. Year will be taken from the last modified time of the file.
Logs with timestamp format -[ 0.003036]- are also supported with
options "--timestamp-logs" or "-tl". Since timestamp many times will
not take epoc time as the source of the timestamp but the time the
system started, the initial datetime will be calculated by substracting
from the file modified datetime the last timestamp in the file.
These log files will aso be affected by log base directory and log
postfix.
"""
general_epilog = """
@ -463,14 +623,20 @@ one has not been provided:'
help='Base path for all the log files')
parser.add_argument('--log-postfix ', '-p', dest='log_postfix',
help='Append to all the log files path')
parser.add_argument('logfiles', nargs='+', metavar='log_file',
help='File in the format of log_file[:ALIAS]')
parser.add_argument('logfiles', nargs='+', metavar='log_file[:ALIAS]',
help='OpenStack log file.')
parser.add_argument('--alias-level', '-a', type=int, default=0,
dest='alias_level',
help='Level of smart alias naming (0-3)')
parser.add_argument('--min-memory', '-m', default=False,
action='store_true', dest='limit_memory',
help='Limit memory usage')
parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
dest='logfiles_m', metavar='file[:ALIAS]',
help='Message log files with format: Oct 15 14:11:19')
parser.add_argument('--timestamp-logs', '-tl', default=[], nargs='+',
dest='logfiles_t', metavar='file[:ALIAS]',
help='Message log files with timestamp: [ 0.003036]')
return parser.parse_args()