Merge pull request #25 from Akrog/issue7/other_log_formats
Add support for other log formats
This commit is contained in:
commit
561b4310f7
|
@ -10,6 +10,9 @@ Changelog
|
|||
- Add base log path option: `-b` `--log-base`.
|
||||
- Log postfix option: `-p` `--log-postfix`.
|
||||
- Auto alias generation: `-a` `--alias-level`.
|
||||
- Add support for default /var/log/messages datetime format files with
|
||||
`-ml [FILE [FILE]]`
|
||||
- Add support for timestamped log files with `-tl [FILE [FILE]]`
|
||||
|
||||
**Bugfixes:**
|
||||
|
||||
|
|
44
README.rst
44
README.rst
|
@ -93,6 +93,50 @@ Example for Cinder:
|
|||
$ os-log-merger -b /var/log/cinder/ -p .log api:api scheduler:sch volume:vol
|
||||
|
||||
|
||||
/var/log/messages
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
os-log-merger also supports /var/log/messages type of files with options `-ml`
|
||||
and `--msg-logs` options.
|
||||
|
||||
Since the format for those files is missing year information -MAR 24 14:11:19-
|
||||
the year from the last file modification will be used.
|
||||
|
||||
These files can also be specified with globs and they support alias definition
|
||||
as well.
|
||||
|
||||
Beware that openstack files should be listed before `-ml` option files.
|
||||
|
||||
Example for Cinder:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ os-log-merger -b /var/log/ cinder/api.log:API -ml messages:MSG *.log
|
||||
|
||||
|
||||
Timestamped logs
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
os-log-merger also supports timestamped -[ 0.003036]- with options `-tl`
|
||||
and `--timestamp-logs` options.
|
||||
|
||||
Since timestamp many times will not take epoc time as the source of the
|
||||
timestamp but the time the system started, the initial datetime will be
|
||||
calculated by substracting from the file modified datetime the last timestamp
|
||||
in the file.
|
||||
|
||||
These files can also be specified with globs and they support alias definition
|
||||
as well.
|
||||
|
||||
Beware that openstack files should be listed before `-tl` option files.
|
||||
|
||||
Example for Cinder:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ os-log-merger -b /var/log/ cinder/api.log:API -tl dmesg:DMSG
|
||||
|
||||
|
||||
Auto Alias
|
||||
~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
from __future__ import print_function
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib2
|
||||
|
||||
|
||||
|
@ -74,15 +75,94 @@ FILE_MAP = {
|
|||
}
|
||||
|
||||
|
||||
class OpenStackLog:
|
||||
def __init__(self, filename):
|
||||
self._open(filename)
|
||||
class LogEntry(object):
|
||||
separator = ' '
|
||||
date_format = None
|
||||
_date_parse_msg = 'unconverted data remains: '
|
||||
|
||||
def _open(self, filename):
|
||||
def __init__(self, **kwargs):
|
||||
self._date_length = None
|
||||
self.__dict__.update(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_init_args(cls, filename):
|
||||
return {}
|
||||
|
||||
def prepare_line(self, line):
|
||||
return line
|
||||
|
||||
def parse_date(self, line):
|
||||
try:
|
||||
dt = datetime.strptime(line, self.date_format)
|
||||
except ValueError as e:
|
||||
if not e.args[0].startswith(self._date_parse_msg):
|
||||
raise
|
||||
prepared_date_length = (len(line) - len(e.args[0]) +
|
||||
len(self._date_parse_msg))
|
||||
dt = datetime.strptime(line[:prepared_date_length],
|
||||
self.date_format)
|
||||
return dt
|
||||
|
||||
def _calculate_date_length(self):
|
||||
return len(self.date.strftime(self.date_format))
|
||||
|
||||
@property
|
||||
def date_length(self):
|
||||
if not self._date_length:
|
||||
self._date_length = self._calculate_date_length()
|
||||
return self._date_length
|
||||
|
||||
@classmethod
|
||||
def factory(cls, filename, line, **kwargs):
|
||||
self = cls(**kwargs)
|
||||
|
||||
self.filename = filename
|
||||
if not line:
|
||||
raise ValueError
|
||||
|
||||
# Prepare the line for date parsing
|
||||
prepared_line = self.prepare_line(line)
|
||||
|
||||
# Extract the datetime
|
||||
self.date = self.parse_date(prepared_line)
|
||||
|
||||
if (len(line) == self.date_length or
|
||||
line[self.date_length] != self.separator):
|
||||
raise ValueError
|
||||
|
||||
self.date_str = line[:self.date_length]
|
||||
# +1 to remove the separator so we don't have 2 spaces on output
|
||||
self.data = line[self.date_length + 1:]
|
||||
return self
|
||||
|
||||
def append_line(self, line):
|
||||
self.data += EXTRALINES_PADDING + line
|
||||
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.date, other.date)
|
||||
|
||||
|
||||
class LogFile(object):
|
||||
log_entry_class = LogEntry
|
||||
|
||||
@staticmethod
|
||||
def factory(cls, filename):
|
||||
instance = LogFile(filename)
|
||||
instance.log_entry_class = cls
|
||||
instance.entry_kwargs = cls.get_init_args(filename)
|
||||
return instance
|
||||
|
||||
def __init__(self, filename):
|
||||
self.open(filename)
|
||||
|
||||
def open(self, filename):
|
||||
self._filename = filename
|
||||
if filename.startswith("http://"):
|
||||
filename = self._cached_download(filename)
|
||||
|
||||
self._file = open(filename, 'r')
|
||||
stat = os.stat(filename)
|
||||
self.mtime = datetime.fromtimestamp(stat.st_mtime)
|
||||
|
||||
def _url_cache_path(self, url):
|
||||
md5 = hashlib.md5()
|
||||
|
@ -104,29 +184,16 @@ class OpenStackLog:
|
|||
file_out = open(path, 'w')
|
||||
file_out.write(http_in.read())
|
||||
file_out.close()
|
||||
|
||||
# Set the file time to the one from the URL
|
||||
info = http_in.info()
|
||||
m_date = info.getdate('date')
|
||||
mtime = time.mktime(m_date)
|
||||
os.utime(path, (mtime, mtime))
|
||||
|
||||
http_in.close()
|
||||
return path
|
||||
|
||||
def _extract_with_date(self, line):
|
||||
try:
|
||||
# TODO(mangelajo): We support the default log format
|
||||
# so far, but we may need to discover
|
||||
# different ones.
|
||||
chunks = line.split(" ")
|
||||
datetime_str = ' '.join(chunks[:2])
|
||||
# this is likely to be not necessary, we can just compare
|
||||
# strings, and that's going to be faster than parsing
|
||||
# and regenerating later, but, could be useful when mixing
|
||||
# log and date formats.
|
||||
date_object = datetime.strptime(
|
||||
datetime_str, "%Y-%m-%d %H:%M:%S.%f")
|
||||
pid, level = chunks[2], chunks[3]
|
||||
rest = ' '.join(chunks[4:])
|
||||
return (date_object, datetime_str, self._filename, pid, level,
|
||||
rest)
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def __iter__(self):
|
||||
self.entry = None
|
||||
self.next_entry = None
|
||||
|
@ -139,7 +206,9 @@ class OpenStackLog:
|
|||
return entry, None
|
||||
|
||||
try:
|
||||
new_entry = self._extract_with_date(line)
|
||||
new_entry = self.log_entry_class.factory(self._filename,
|
||||
line,
|
||||
**self.entry_kwargs)
|
||||
if new_entry is None:
|
||||
continue
|
||||
if entry:
|
||||
|
@ -150,9 +219,7 @@ class OpenStackLog:
|
|||
# it's a non-dated line, just append to the entry
|
||||
# extra info
|
||||
if entry:
|
||||
(date_object, date_str, filename, pid, level, rest) = entry
|
||||
entry = (date_object, date_str, filename, pid, level,
|
||||
rest + EXTRALINES_PADDING + line)
|
||||
entry.append_line(line)
|
||||
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
@ -174,7 +241,80 @@ class OpenStackLog:
|
|||
|
||||
if (other.peek() or self.peek()) is None:
|
||||
return 0 if self.peek() is None else -1
|
||||
return cmp(self.peek()[0], other.peek()[0])
|
||||
return cmp(self.peek(), other.peek())
|
||||
|
||||
|
||||
class MsgLogEntry(LogEntry):
|
||||
"""Message format: Oct 15 14:11:19"""
|
||||
date_format = '%Y%b %d %H:%M:%S'
|
||||
|
||||
@classmethod
|
||||
def get_init_args(cls, filename):
|
||||
kwargs = super(MsgLogEntry, cls).get_init_args(filename)
|
||||
stat = os.stat(filename)
|
||||
kwargs['file_year'] = datetime.fromtimestamp(stat.st_mtime).year
|
||||
return kwargs
|
||||
|
||||
def prepare_line(self, line):
|
||||
# TODO: If year of file creation and file last modification are
|
||||
# different we should start with the cration year and then change to
|
||||
# the next year once the months go back.
|
||||
return '%s%s' % (self.file_year, line)
|
||||
|
||||
def _calculate_date_length(self):
|
||||
return super(MsgLogEntry, self)._calculate_date_length() - 4
|
||||
|
||||
|
||||
class OSLogEntry(LogEntry):
|
||||
"""OpenStack default log: 2016-02-01 10:22:59.239"""
|
||||
date_format = '%Y-%m-%d %H:%M:%S.%f'
|
||||
|
||||
def _calculate_date_length(self):
|
||||
return super(OSLogEntry, self)._calculate_date_length() - 3
|
||||
|
||||
|
||||
class TSLogEntry(LogEntry):
|
||||
"""Timestamped log: [275514.814982]"""
|
||||
|
||||
@classmethod
|
||||
def get_init_args(cls, filename):
|
||||
kwargs = super(TSLogEntry, cls).get_init_args(filename)
|
||||
stat = os.stat(filename)
|
||||
mtime = datetime.fromtimestamp(stat.st_mtime)
|
||||
timestamp = cls._get_last_timestamp(filename)
|
||||
kwargs['start_date'] = mtime - timedelta(seconds=timestamp)
|
||||
return kwargs
|
||||
|
||||
@classmethod
|
||||
def _get_last_timestamp(cls, filename):
|
||||
result = None
|
||||
with open(filename, 'r') as f:
|
||||
file_size = os.fstat(f.fileno()).st_size
|
||||
# We will jump to the last KB so we don't have to read all file
|
||||
offset = max(0, file_size - 1024)
|
||||
f.seek(offset)
|
||||
for line in f:
|
||||
try:
|
||||
__, result = cls._read_timestamp(line)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _read_timestamp(line):
|
||||
start = line.index('[') + 1
|
||||
end = line.index(']')
|
||||
|
||||
if end < start:
|
||||
raise ValueError
|
||||
|
||||
return end, float(line[start:end])
|
||||
|
||||
def parse_date(self, date_str):
|
||||
end, timestamp = self._read_timestamp(date_str)
|
||||
self._date_length = end + 1
|
||||
return self.start_date + timedelta(seconds=timestamp)
|
||||
|
||||
|
||||
def process_logs_limit_memory_usage(logs):
|
||||
|
@ -207,20 +347,28 @@ def process_logs_memory_hog(logs):
|
|||
for entry in log:
|
||||
all_entries.append(entry)
|
||||
|
||||
sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
|
||||
sorted_entries = sorted(all_entries)
|
||||
for entry in sorted_entries:
|
||||
yield entry
|
||||
|
||||
|
||||
LOG_TYPES = [
|
||||
('logfiles', OSLogEntry),
|
||||
('logfiles_m', MsgLogEntry),
|
||||
('logfiles_t', TSLogEntry),
|
||||
]
|
||||
|
||||
|
||||
def process_logs(cfg):
|
||||
filename_alias = {}
|
||||
logs = []
|
||||
for filename in cfg.logfiles:
|
||||
path, alias, is_url = get_path_and_alias(filename,
|
||||
cfg.log_base,
|
||||
cfg.log_postfix)
|
||||
filename_alias[path] = (filename, alias, is_url)
|
||||
logs.append(OpenStackLog(path))
|
||||
for arg_name, entry_cls in LOG_TYPES:
|
||||
for filename in getattr(cfg, arg_name):
|
||||
path, alias, is_url = get_path_and_alias(filename,
|
||||
cfg.log_base,
|
||||
cfg.log_postfix)
|
||||
filename_alias[path] = (filename, alias, is_url)
|
||||
logs.append(LogFile.factory(entry_cls, path))
|
||||
|
||||
alias = generate_aliases(filename_alias, cfg)
|
||||
|
||||
|
@ -230,9 +378,8 @@ def process_logs(cfg):
|
|||
method = process_logs_memory_hog
|
||||
|
||||
for entry in method(logs):
|
||||
(date_object, date_str, filename, pid, level, rest) = entry
|
||||
print (' '.join([date_str, '[%s]' % alias[filename], pid,
|
||||
level, rest]).rstrip('\n'))
|
||||
print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
|
||||
entry.data.rstrip('\n')))
|
||||
|
||||
|
||||
def get_path_and_alias(filename, log_base, log_postfix):
|
||||
|
@ -414,9 +561,22 @@ alias. Use the aliases if you want shorter line lengths.
|
|||
|
||||
Logs are expected to contain lines in the following format:
|
||||
|
||||
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
|
||||
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
|
||||
Y-m-d H:M:S.mmm ............
|
||||
Y-m-d H:M:S.mmm ............
|
||||
[ extra line info ..... ]
|
||||
|
||||
Logs with default /var/log/messages datetime format (Oct 15 14:11:19)
|
||||
can optionally be merged as well using "--msg-logs" or "-ml"
|
||||
options. Year will be taken from the last modified time of the file.
|
||||
|
||||
Logs with timestamp format -[ 0.003036]- are also supported with
|
||||
options "--timestamp-logs" or "-tl". Since timestamp many times will
|
||||
not take epoc time as the source of the timestamp but the time the
|
||||
system started, the initial datetime will be calculated by substracting
|
||||
from the file modified datetime the last timestamp in the file.
|
||||
|
||||
These log files will aso be affected by log base directory and log
|
||||
postfix.
|
||||
"""
|
||||
|
||||
general_epilog = """
|
||||
|
@ -463,14 +623,20 @@ one has not been provided:'
|
|||
help='Base path for all the log files')
|
||||
parser.add_argument('--log-postfix ', '-p', dest='log_postfix',
|
||||
help='Append to all the log files path')
|
||||
parser.add_argument('logfiles', nargs='+', metavar='log_file',
|
||||
help='File in the format of log_file[:ALIAS]')
|
||||
parser.add_argument('logfiles', nargs='+', metavar='log_file[:ALIAS]',
|
||||
help='OpenStack log file.')
|
||||
parser.add_argument('--alias-level', '-a', type=int, default=0,
|
||||
dest='alias_level',
|
||||
help='Level of smart alias naming (0-3)')
|
||||
parser.add_argument('--min-memory', '-m', default=False,
|
||||
action='store_true', dest='limit_memory',
|
||||
help='Limit memory usage')
|
||||
parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
|
||||
dest='logfiles_m', metavar='file[:ALIAS]',
|
||||
help='Message log files with format: Oct 15 14:11:19')
|
||||
parser.add_argument('--timestamp-logs', '-tl', default=[], nargs='+',
|
||||
dest='logfiles_t', metavar='file[:ALIAS]',
|
||||
help='Message log files with timestamp: [ 0.003036]')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
|
Loading…
Reference in New Issue