From 1be8b23dfc6e93a82b9741f5468418b1b3818201 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Wed, 20 Sep 2017 18:32:15 +0100 Subject: [PATCH] Add auto-detect parser for libvirt domain logs Change-Id: I7d98327bebf20b01e41525b56ef59c68cbb8bbd3 --- oslogmerger/oslogmerger.py | 99 +++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/oslogmerger/oslogmerger.py b/oslogmerger/oslogmerger.py index 474b01c..26c2377 100644 --- a/oslogmerger/oslogmerger.py +++ b/oslogmerger/oslogmerger.py @@ -1,6 +1,7 @@ from __future__ import print_function import argparse from datetime import datetime, timedelta +import dateutil.parser import hashlib import heapq import os @@ -99,6 +100,12 @@ class LogEntry(object): class LogParser(object): + # Default to UTC if we have no explicit TZ + default_tz = dateutil.tz.tzutc() + + def __init__(self, filename): + pass + def parse_line(self, line): raise NotImplementedError @@ -118,6 +125,7 @@ class StrptimeParser(LogParser): dt_str = ' '.join(dt_str) dt = datetime.strptime(dt_str, self.date_format) + dt = dt.replace(tzinfo=self.default_tz) # +1 to remove the separator so we don't have 2 spaces on output return dt, dt_str, data @@ -145,6 +153,84 @@ class MsgLogParser(StrptimeParser): return dt.replace(self.year), dt_str, data +def make_tzinfo(name, sign, hours, minutes): + tzoffset = int(minutes) * 60 + int(hours) * 3600 + if sign == '-': + tzoffset = -tzoffset + return dateutil.tz.tzoffset(name, tzoffset) + + +class LibvirtdParser(LogParser): + """Message format: 2017-09-18 18:08:49.163+0000: + OR: 2017-09-18T18:08:49.216429Z qemu-kvm: + + This parser handles libvirtd.log and libvirt domain logs. Domain logs + contain a mixture of libvirt and qemu logs, hence the 2 log formats. + """ + LIBVIRT = re.compile('(\d{4})-(\d{2})-(\d{2}) ' # Date + '(\d{2}):(\d{2}):(\d{2})\.(\d{3})' # Time + '(' # + '([+-])(\d{2})(\d{2})' # Timezone + '):\s*') # + + QEMU = re.compile('(\d{4})-(\d{2})-(\d{2})T' # Date + '(\d{2}):(\d{2}):(\d{2})\.(\d+)Z\s*') # Time + + def parse_line(self, line): + m = self.LIBVIRT.match(line) + if m is not None: + return self._parse_libvirt(line, m) + + m = self.QEMU.match(line) + if m is not None: + return self._parse_qemu(line, m) + + raise ValueError('Unsupported format') + + def _parse_libvirt(self, line, match): + groups = list(match.groups()) + + (tzminutes, tzhours, tzsign, tzstr) = ( + groups.pop(), groups.pop(), groups.pop(), groups.pop()) + tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes) + + dt = datetime( + year=int(groups.pop(0)), + month=int(groups.pop(0)), + day=int(groups.pop(0)), + hour=int(groups.pop(0)), + minute=int(groups.pop(0)), + second=int(groups.pop(0)), + microsecond=int(groups.pop(0)) * 1000, + tzinfo=tzinfo, + ) + + # Strip colon and trailing whitespace from full date string + dt_str = match.group(0).rstrip()[:-1] + + return dt, dt_str, line[match.end():] + + def _parse_qemu(self, line, match): + groups = list(match.groups()) + + dt = datetime( + year=int(groups.pop(0)), + month=int(groups.pop(0)), + day=int(groups.pop(0)), + hour=int(groups.pop(0)), + minute=int(groups.pop(0)), + second=int(groups.pop(0)), + microsecond=int(groups.pop(0)), + # The trailing 'Z' means UTC + tzinfo=dateutil.tz.tzutc(), + ) + + # Strip trailing whitespace from full date string + dt_str = match.group(0).rstrip() + + return dt, dt_str, line[match.end():] + + class TSLogParser(LogParser): """Timestamped log: [275514.814982]""" @@ -185,6 +271,7 @@ class TSLogParser(LogParser): def parse_line(self, line): end, timestamp = self._read_timestamp(line) dt = self.start_date + timedelta(seconds=timestamp) + dt = dt.replace(tzinfo = self.default_tz) return dt, line[:end + 1], line[end + 1:] @@ -193,7 +280,7 @@ class LogFile(object): self.open(filename) parsers = [] - for cls in LOG_TYPES.values(): + for cls in LOG_TYPES.values() + DETECTED_LOG_TYPES: if cls is None: continue @@ -208,6 +295,9 @@ class LogFile(object): # the first to successfully parse a line for i in range(0, 5): line = self._readline() + if line is None: + continue + for parser in parsers: try: parser.parse_line(line) @@ -320,6 +410,7 @@ class LogFile(object): return cmp(self.peek(), other.peek()) +# Log file formats with command line options LOG_TYPES = { 'logfiles_detect': None, 'logfiles_o': OSLogParser, @@ -328,6 +419,12 @@ LOG_TYPES = { } +# Log file formats which can only be auto-detected +DETECTED_LOG_TYPES = [ + LibvirtdParser, +] + + def process_logs(cfg): filename_alias = {} logs = []