diff --git a/oslogmerger/oslogmerger.py b/oslogmerger/oslogmerger.py index 26c2377..446a0be 100644 --- a/oslogmerger/oslogmerger.py +++ b/oslogmerger/oslogmerger.py @@ -2,9 +2,11 @@ from __future__ import print_function import argparse from datetime import datetime, timedelta import dateutil.parser +import dateutil.tz import hashlib import heapq import os +import re import sys import tempfile import time @@ -231,6 +233,48 @@ class LibvirtdParser(LogParser): return dt, dt_str, line[match.end():] +class RawSyslog(LogParser): + """Raw syslog: <183>1 2017-04-03T21:48:21.781459-03:30""" + + # NOTE(mdbooth): Parsing the date in this regexp and reconstructing it + # manually is a *lot* faster than passing the whole string to + # dateutil.parse(). Didn't try strptime due to having to parse tzinfo + # manually anyway. + HEADER = re.compile('<\d+>\d+\s' + '(' + '(\d{4})-(\d{2})-(\d{2})T' # Date + '(\d{2}):(\d{2}):(\d{2})\.(\d+)' # Time + '(' # + '([+-])(\d{2}):(\d{2})' # Timezone + ')' # + ')\s*') + + def parse_line(self, line): + m = RawSyslog.HEADER.match(line) + if m is None: + raise ValueError("Not syslog packet") + + groups = list(m.groups()) + dt_str = groups.pop(0) + + (tzminutes, tzhours, tzsign, tzstr) = ( + groups.pop(), groups.pop(), groups.pop(), groups.pop()) + tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes) + + dt = datetime( + year=int(groups.pop(0)), + month=int(groups.pop(0)), + day=int(groups.pop(0)), + hour=int(groups.pop(0)), + minute=int(groups.pop(0)), + second=int(groups.pop(0)), + microsecond=int(groups.pop(0)), + tzinfo=tzinfo, + ) + + return dt, dt_str, line[m.end():] + + class TSLogParser(LogParser): """Timestamped log: [275514.814982]""" @@ -422,6 +466,7 @@ LOG_TYPES = { # Log file formats which can only be auto-detected DETECTED_LOG_TYPES = [ LibvirtdParser, + RawSyslog, ]