Add auto-detect parser for raw syslog

Change-Id: Iba7e80d97cffc809defdfdd87f88a8b698f28019
This commit is contained in:
Matthew Booth 2017-05-05 17:18:54 +01:00
parent 1be8b23dfc
commit 1a31f79327
1 changed files with 45 additions and 0 deletions

View File

@ -2,9 +2,11 @@ from __future__ import print_function
import argparse
from datetime import datetime, timedelta
import dateutil.parser
import dateutil.tz
import hashlib
import heapq
import os
import re
import sys
import tempfile
import time
@ -231,6 +233,48 @@ class LibvirtdParser(LogParser):
return dt, dt_str, line[match.end():]
class RawSyslog(LogParser):
"""Raw syslog: <183>1 2017-04-03T21:48:21.781459-03:30"""
# NOTE(mdbooth): Parsing the date in this regexp and reconstructing it
# manually is a *lot* faster than passing the whole string to
# dateutil.parse(). Didn't try strptime due to having to parse tzinfo
# manually anyway.
HEADER = re.compile('<\d+>\d+\s'
'('
'(\d{4})-(\d{2})-(\d{2})T' # Date
'(\d{2}):(\d{2}):(\d{2})\.(\d+)' # Time
'(' #
'([+-])(\d{2}):(\d{2})' # Timezone
')' #
')\s*')
def parse_line(self, line):
m = RawSyslog.HEADER.match(line)
if m is None:
raise ValueError("Not syslog packet")
groups = list(m.groups())
dt_str = groups.pop(0)
(tzminutes, tzhours, tzsign, tzstr) = (
groups.pop(), groups.pop(), groups.pop(), groups.pop())
tzinfo = make_tzinfo(tzstr, tzsign, tzhours, tzminutes)
dt = datetime(
year=int(groups.pop(0)),
month=int(groups.pop(0)),
day=int(groups.pop(0)),
hour=int(groups.pop(0)),
minute=int(groups.pop(0)),
second=int(groups.pop(0)),
microsecond=int(groups.pop(0)),
tzinfo=tzinfo,
)
return dt, dt_str, line[m.end():]
class TSLogParser(LogParser):
"""Timestamped log: [275514.814982]"""
@ -422,6 +466,7 @@ LOG_TYPES = {
# Log file formats which can only be auto-detected
DETECTED_LOG_TYPES = [
LibvirtdParser,
RawSyslog,
]