Add Support /var/log/messages type of logs

This patch adds support for log files that use the same datetime format
as /var/log/messages, which is in the for of Oct 15 14:11:19.

Year will from last modification to the file will be used to complete
missing year.

In the process of refactoring the read of the logs the OpenStack log
reading has been simplified to not require the presence of PID or LEVEL,
thus providing more compatibility with custom log formats (as long as
default datetime format is being used).

With this refactoring it is now easier to add new datetime formats to
the script.

Option added is --msg-logs or -ml and it also supports ALIAS and globs,
and is also affected by base directory and postfix options.

TODO: If year of file creation and file last modification are different
we should start with the cration year and then change to the next year
once the months go back.
This commit is contained in:
Gorka Eguileor 2016-03-24 21:41:15 +01:00
parent 1e3e83eb2c
commit 8c3ae25c17
3 changed files with 165 additions and 43 deletions

View File

@ -9,6 +9,8 @@ Changelog
- Add base log path option: `-b` `--log-base`.
- Log postfix option: `-p` `--log-postfix`.
- Auto alias generation: `-a` `--alias-level`.
- Add support for default /var/log/messages datetime format files with
`-ml [FILE [FILE]]`
**Bugfixes:**

View File

@ -93,6 +93,27 @@ Example for Cinder:
$ os-log-merger -b /var/log/cinder/ -p .log api:api scheduler:sch volume:vol
/var/log/messages
~~~~~~~~~~~~~~~~~
os-log-merger also supports /var/log/messages type of files with options `-ml`
and `--msg-logs` options.
Since the format for those files is missing year information -MAR 24 14:11:19-
the year from the last file modification will be used.
These files can also be specified with globs and they support alias definition
as well.
Beware that openstack files should be listed before `-ml` option files.
Example for Cinder:
.. code:: bash
$ os-log-merger -b /var/log/ cinder/api.log:API -ml messages:MSG *.log
Auto Alias
~~~~~~~~~~

View File

@ -5,6 +5,7 @@ import hashlib
import os
import sys
import tempfile
import time
import urllib2
@ -74,15 +75,88 @@ FILE_MAP = {
}
class OpenStackLog:
def __init__(self, filename):
self._open(filename)
class LogEntry(object):
separator = ' '
date_format = None
_date_parse_msg = 'unconverted data remains: '
def _open(self, filename):
def __init__(self):
self._date_length = None
def prepare_line(self, line, file_datetime):
return line
def parse_date(self, line):
try:
dt = datetime.strptime(line, self.date_format)
except ValueError as e:
if not e.args[0].startswith(self._date_parse_msg):
raise
prepared_date_length = (len(line) - len(e.args[0]) +
len(self._date_parse_msg))
dt = datetime.strptime(line[:prepared_date_length],
self.date_format)
return dt
def _calculate_date_length(self):
return len(self.date.strftime(self.date_format))
@property
def date_length(self):
if not self._date_length:
self._date_length = self._calculate_date_length()
return self._date_length
@classmethod
def factory(cls, filename, line, file_datetime):
self = cls()
self.filename = filename
if not line:
raise ValueError
# Prepare the line for date parsing
prepared_line = self.prepare_line(line, file_datetime)
# Extract the datetime
self.date = self.parse_date(prepared_line)
if (len(line) == self.date_length or
line[self.date_length] != self.separator):
raise ValueError
self.date_str = line[:self.date_length]
# +1 to remove the separator so we don't have 2 spaces on output
self.data = line[self.date_length + 1:]
return self
def append_line(self, line):
self.data += EXTRALINES_PADDING + line
def __cmp__(self, other):
return cmp(self.date, other.date)
class LogFile(object):
log_entry_class = LogEntry
@staticmethod
def factory(cls, filename):
instance = LogFile(filename)
instance.log_entry_class = cls
return instance
def __init__(self, filename):
self.open(filename)
def open(self, filename):
self._filename = filename
if filename.startswith("http://"):
filename = self._cached_download(filename)
self._file = open(filename, 'r')
stat = os.stat(filename)
self.mtime = datetime.fromtimestamp(stat.st_mtime)
def _url_cache_path(self, url):
md5 = hashlib.md5()
@ -104,29 +178,16 @@ class OpenStackLog:
file_out = open(path, 'w')
file_out.write(http_in.read())
file_out.close()
# Set the file time to the one from the URL
info = http_in.info()
m_date = info.getdate('date')
mtime = time.mktime(m_date)
os.utime(path, (mtime, mtime))
http_in.close()
return path
def _extract_with_date(self, line):
try:
# TODO(mangelajo): We support the default log format
# so far, but we may need to discover
# different ones.
chunks = line.split(" ")
datetime_str = ' '.join(chunks[:2])
# this is likely to be not necessary, we can just compare
# strings, and that's going to be faster than parsing
# and regenerating later, but, could be useful when mixing
# log and date formats.
date_object = datetime.strptime(
datetime_str, "%Y-%m-%d %H:%M:%S.%f")
pid, level = chunks[2], chunks[3]
rest = ' '.join(chunks[4:])
return (date_object, datetime_str, self._filename, pid, level,
rest)
except IndexError:
return None
def __iter__(self):
self.entry = None
self.next_entry = None
@ -139,7 +200,9 @@ class OpenStackLog:
return entry, None
try:
new_entry = self._extract_with_date(line)
new_entry = self.log_entry_class.factory(self._filename,
line,
self.mtime)
if new_entry is None:
continue
if entry:
@ -150,9 +213,7 @@ class OpenStackLog:
# it's a non-dated line, just append to the entry
# extra info
if entry:
(date_object, date_str, filename, pid, level, rest) = entry
entry = (date_object, date_str, filename, pid, level,
rest + EXTRALINES_PADDING + line)
entry.append_line(line)
def __next__(self):
return self.next()
@ -174,7 +235,29 @@ class OpenStackLog:
if (other.peek() or self.peek()) is None:
return 0 if self.peek() is None else -1
return cmp(self.peek()[0], other.peek()[0])
return cmp(self.peek(), other.peek())
class MsgLogEntry(LogEntry):
"""Message format: Oct 15 14:11:19"""
date_format = '%Y%b %d %H:%M:%S'
def prepare_line(self, line, file_datetime):
# TODO: If year of file creation and file last modification are
# different we should start with the cration year and then change to
# the next year once the months go back.
return '%s%s' % (file_datetime.year, line)
def _calculate_date_length(self):
return super(MsgLogEntry, self)._calculate_date_length() - 4
class OSLogEntry(LogEntry):
"""OpenStack default log: 2016-02-01 10:22:59.239"""
date_format = '%Y-%m-%d %H:%M:%S.%f'
def _calculate_date_length(self):
return super(OSLogEntry, self)._calculate_date_length() - 3
def process_logs_limit_memory_usage(logs):
@ -207,20 +290,27 @@ def process_logs_memory_hog(logs):
for entry in log:
all_entries.append(entry)
sorted_entries = sorted(all_entries, key=lambda log_entry: log_entry[0])
sorted_entries = sorted(all_entries)
for entry in sorted_entries:
yield entry
LOG_TYPES = [
('logfiles', OSLogEntry),
('logfiles_m', MsgLogEntry),
]
def process_logs(cfg):
filename_alias = {}
logs = []
for filename in cfg.logfiles:
path, alias, is_url = get_path_and_alias(filename,
cfg.log_base,
cfg.log_postfix)
filename_alias[path] = (filename, alias, is_url)
logs.append(OpenStackLog(path))
for arg_name, entry_cls in LOG_TYPES:
for filename in getattr(cfg, arg_name):
path, alias, is_url = get_path_and_alias(filename,
cfg.log_base,
cfg.log_postfix)
filename_alias[path] = (filename, alias, is_url)
logs.append(LogFile.factory(entry_cls, path))
alias = generate_aliases(filename_alias, cfg)
@ -230,9 +320,8 @@ def process_logs(cfg):
method = process_logs_memory_hog
for entry in method(logs):
(date_object, date_str, filename, pid, level, rest) = entry
print (' '.join([date_str, '[%s]' % alias[filename], pid,
level, rest]).rstrip('\n'))
print('%s [%s] %s' % (entry.date_str, alias[entry.filename],
entry.data.rstrip('\n')))
def get_path_and_alias(filename, log_base, log_postfix):
@ -400,9 +489,16 @@ alias. Use the aliases if you want shorter line lengths.
Logs are expected to contain lines in the following format:
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
Y-m-d H:M:S.mmm PID LOG-LEVEL ............
Y-m-d H:M:S.mmm ............
Y-m-d H:M:S.mmm ............
[ extra line info ..... ]
Logs with default /var/log/messages datetime format (Oct 15 14:11:19)
can optionally be merged as well using "--msg-logs" or "-ml"
options. Year will be taken from the last modified time of the file.
These log files will aso be affected by log base directory and log
postfix.
"""
general_epilog = """
@ -449,14 +545,17 @@ one has not been provided:'
help='Base path for all the log files')
parser.add_argument('--log-postfix ', '-p', dest='log_postfix',
help='Append to all the log files path')
parser.add_argument('logfiles', nargs='+', metavar='log_file',
help='File in the format of log_file[:ALIAS]')
parser.add_argument('logfiles', nargs='+', metavar='log_file[:ALIAS]',
help='OpenStack log file.')
parser.add_argument('--alias-level', '-a', type=int, default=0,
dest='alias_level',
help='Level of smart alias naming (0-3)')
parser.add_argument('--min-memory', '-m', default=False,
action='store_true', dest='limit_memory',
help='Limit memory usage')
parser.add_argument('--msg-logs', '-ml', default=[], nargs='+',
dest='logfiles_m', metavar='file[:ALIAS]',
help='Message log files with format: Oct 15 14:11:19')
return parser.parse_args()