fuel-web/nailgun/nailgun/api/v1/handlers/logs.py

# -*- coding: utf-8 -*-

#    Copyright 2013 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

"""
Handlers dealing with logs
"""

from itertools import dropwhile
import logging
import os
import re
import time

from oslo_serialization import jsonutils

import web

from nailgun import consts
from nailgun import objects

from nailgun.api.v1.handlers.base import BaseHandler
from nailgun.api.v1.handlers.base import handle_errors
from nailgun.api.v1.handlers.base import serialize
from nailgun.api.v1.handlers.base import validate
from nailgun.settings import settings
from nailgun.task.manager import DumpTaskManager
from nailgun.task.task import DumpTask


logger = logging.getLogger(__name__)


def read_backwards(file, from_byte=None, bufsize=0x20000):
    cache_pos = file.tell()
    file.seek(0, os.SEEK_END)
    size = file.tell()
    file.seek(cache_pos, os.SEEK_SET)
    if size == 0:
        return
    if from_byte is None:
        from_byte = size
    lines = ['']
    read_size = bufsize
    rem = from_byte % bufsize
    if rem == 0:
        # Perform bufsize reads only
        pos = max(0, (from_byte // bufsize - 1) * bufsize)
    else:
        # One more iteration will be done to read rem bytes so that we
        # are aligned to exactly bufsize reads later on
        read_size = rem
        pos = (from_byte // bufsize) * bufsize

    while pos >= 0:
        file.seek(pos, os.SEEK_SET)
        data = file.read(read_size) + lines[0]
        lines = re.findall('[^\n]*\n?', data)
        ix = len(lines) - 2
        while ix > 0:
            yield lines[ix]
            ix -= 1
        pos -= bufsize
        read_size = bufsize
    else:
        yield lines[0]
        # Set cursor position to last read byte
        try:
            file.seek(max(0, pos), os.SEEK_SET)
        except IOError:
            pass


# It turns out that strftime/strptime are costly functions in Python
# http://stackoverflow.com/questions/13468126/a-faster-strptime
# We don't call them if the log and UI date formats aren't very different
STRPTIME_PERFORMANCE_HACK = {}
if settings.UI_LOG_DATE_FORMAT == '%Y-%m-%d %H:%M:%S':
    STRPTIME_PERFORMANCE_HACK = {
        '%Y-%m-%dT%H:%M:%S': lambda date: date.replace('T', ' '),
        '%Y-%m-%d %H:%M:%S': lambda date: date,
    }


def read_log(
        log_file=None,
        level=None,
        log_config={},
        max_entries=None,
        regexp=None,
        from_byte=-1,
        fetch_older=False,
        to_byte=0,
        **kwargs):
    has_more = False
    entries = []
    log_date_format = log_config['date_format']
    multiline = log_config.get('multiline', False)
    skip_regexp = None
    if 'skip_regexp' in log_config:
        skip_regexp = re.compile(log_config['skip_regexp'])

    allowed_levels = log_config['levels']
    if level:
        allowed_levels = list(dropwhile(lambda l: l != level,
                                        log_config['levels']))

    log_file_size = os.stat(log_file).st_size

    if log_date_format in STRPTIME_PERFORMANCE_HACK:
        strptime_function = STRPTIME_PERFORMANCE_HACK[log_date_format]
    else:
        strptime_function = lambda date: time.strftime(
            settings.UI_LOG_DATE_FORMAT,
            time.strptime(date, log_date_format)
        )

    with open(log_file, 'r') as f:
        # we need to calculate current position manually instead of using
        # tell() because read_backwards uses buffering
        f.seek(0, os.SEEK_END)
        pos = f.tell()
        if from_byte != -1 and fetch_older:
            pos = from_byte
        multilinebuf = []
        for line in read_backwards(f, from_byte=pos):
            pos -= len(line)
            if not fetch_older and pos < to_byte:
                has_more = pos > 0
                break
            entry = line.rstrip('\n')
            if not len(entry):
                continue
            if skip_regexp and skip_regexp.match(entry):
                continue
            m = regexp.match(entry)
            if m is None:
                if multiline:
                    #  Add next multiline part to last entry if it exist.
                    multilinebuf.append(entry)
                else:
                    logger.debug("Unable to parse log entry '%s' from %s",
                                 entry, log_file)
                continue
            entry_text = m.group('text')
            if len(multilinebuf):
                multilinebuf.reverse()
                entry_text += '\n' + '\n'.join(multilinebuf)
                multilinebuf = []
            entry_level = m.group('level').upper() or 'INFO'
            if level and not (entry_level in allowed_levels):
                continue
            try:
                entry_date = strptime_function(m.group('date'))
            except ValueError:
                logger.debug("Unable to parse date from log entry."
                             " Date format: %r, date part of entry: %r",
                             log_date_format,
                             m.group('date'))
                continue

            entries.append([
                entry_date,
                entry_level,
                entry_text
            ])

            if len(entries) >= max_entries:
                has_more = True
                break

        if fetch_older or (not fetch_older and from_byte == -1):
            from_byte = pos
            if from_byte == 0:
                has_more = False

    return {
        'entries': entries,
        'from': from_byte,
        'to': log_file_size,
        'has_more': has_more,
    }


class LogEntryCollectionHandler(BaseHandler):
    """Log entry collection handler"""

    @handle_errors
    @validate
    @serialize
    def GET(self):
        """Receives following parameters:

        - *date_before* - get logs before this date
        - *date_after* - get logs after this date
        - *source* - source of logs
        - *node* - node id (for getting node logs)
        - *level* - log level (all levels showed by default)
        - *to* - number of entries
        - *max_entries* - max number of entries to load

        :returns: Collection of log entries, log file size
            and if there are new entries.
        :http:
            * 200 (OK)
            * 400 (invalid *date_before* value)
            * 400 (invalid *date_after* value)
            * 400 (invalid *source* value)
            * 400 (invalid *node* value)
            * 400 (invalid *level* value)
            * 400 (invalid *to* value)
            * 400 (invalid *max_entries* value)
            * 404 (log file not found)
            * 404 (log files dir not found)
            * 404 (node not found)
            * 500 (node has no assigned ip)
            * 500 (invalid regular expression in config)
        """
        data = self.read_and_validate_data()

        log_file = data['log_file']
        fetch_older = data['fetch_older']
        from_byte = data['from_byte']
        to_byte = data['to_byte']

        log_file_size = os.stat(log_file).st_size
        if (not fetch_older and to_byte >= log_file_size) or \
                (fetch_older and from_byte == 0):
            return jsonutils.dumps({
                'entries': [],
                'from': from_byte,
                'to': log_file_size,
                'has_more': False,
            })

        return read_log(**data)

    def read_and_validate_data(self):
        user_data = web.input()

        if not user_data.get('source'):
            logger.debug("'source' must be specified")
            raise self.http(400, "'source' must be specified")

        try:
            max_entries = int(user_data.get('max_entries',
                                            settings.TRUNCATE_LOG_ENTRIES))
        except ValueError:
            logger.debug("Invalid 'max_entries' value: %r",
                         user_data.get('max_entries'))
            raise self.http(400, "Invalid 'max_entries' value")

        from_byte = None
        try:
            from_byte = int(user_data.get('from', -1))
        except ValueError:
            logger.debug("Invalid 'from' value: %r", user_data.get('from'))
            raise self.http(400, "Invalid 'from' value")

        to_byte = None
        try:
            to_byte = int(user_data.get('to', 0))
        except ValueError:
            logger.debug("Invalid 'to' value: %r", user_data.get('to'))
            raise self.http(400, "Invalid 'to' value")

        fetch_older = 'fetch_older' in user_data and \
            user_data['fetch_older'].lower() in ('1', 'true')

        date_before = user_data.get('date_before')
        if date_before:
            try:
                date_before = time.strptime(date_before,
                                            settings.UI_LOG_DATE_FORMAT)
            except ValueError:
                logger.debug("Invalid 'date_before' value: %r", date_before)
                raise self.http(400, "Invalid 'date_before' value")

        date_after = user_data.get('date_after')
        if date_after:
            try:
                date_after = time.strptime(date_after,
                                           settings.UI_LOG_DATE_FORMAT)
            except ValueError:
                logger.debug("Invalid 'date_after' value: %r", date_after)
                raise self.http(400, "Invalid 'date_after' value")

        log_config = filter(lambda lc: lc['id'] == user_data.get('source'),
                            settings.LOGS)
        # If log source not found or it is fake source but we are run without
        # fake tasks.
        if not log_config or (log_config[0].get('fake') and
                              not settings.FAKE_TASKS):
            logger.debug("Log source %r not found", user_data.get('source'))
            raise self.http(404, "Log source not found")
        log_config = log_config[0]

        # If it is 'remote' and not 'fake' log source then calculate log file
        # path by base dir, node IP and relative path to file.
        # Otherwise return absolute path.
        node = None
        if log_config['remote'] and not log_config.get('fake'):
            if not user_data.get('node'):
                raise self.http(400, "'node' must be specified")
            try:
                node_id = int(user_data.get('node'))
            except ValueError:
                logger.debug("Invalid 'node' value: %r", user_data.get('node'))
                raise self.http(400, "Invalid 'node' value")
            node = objects.Node.get_by_uid(node_id)
            if not node:
                raise self.http(404, "Node not found")
            if not node.ip:
                logger.error('Node %r has no assigned ip', node.id)
                raise self.http(500, "Node has no assigned ip")

            if node.status == consts.NODE_STATUSES.discover:
                ndir = node.ip
            else:
                ndir = objects.Node.get_node_fqdn(node)

            remote_log_dir = os.path.join(log_config['base'], ndir)
            if not os.path.exists(remote_log_dir):
                logger.debug("Log files dir %r for node %s not found",
                             remote_log_dir, node.id)
                raise self.http(404, "Log files dir for node not found")

            log_file = os.path.join(remote_log_dir, log_config['path'])
        else:
            log_file = log_config['path']

        if not os.path.exists(log_file):
            if node:
                logger.debug("Log file %r for node %s not found",
                             log_file, node.id)
            else:
                logger.debug("Log file %r not found", log_file)
            raise self.http(404, "Log file not found")

        level = user_data.get('level')
        if level is not None and level not in log_config['levels']:
            raise self.http(400, "Invalid level")

        try:
            regexp = re.compile(log_config['regexp'])
        except re.error:
            logger.exception('Invalid regular expression for file %r',
                             log_config['id'])
            raise self.http(500, "Invalid regular expression in config")

        if 'skip_regexp' in log_config:
            try:
                re.compile(log_config['skip_regexp'])
            except re.error:
                logger.exception('Invalid regular expression for file %r',
                                 log_config['id'])
                raise self.http(500, "Invalid regular expression in config")

        return {
            'date_after': date_after,
            'date_before': date_before,
            'level': level,
            'log_file': log_file,
            'log_config': log_config,
            'max_entries': max_entries,
            'node': node,
            'regexp': regexp,

            'fetch_older': fetch_older,
            'from_byte': from_byte,
            'to_byte': to_byte,
        }


class LogPackageHandler(BaseHandler):
    """Log package handler"""
    @handle_errors
    @validate
    def PUT(self):
        """:returns: JSONized Task object.

        :http: * 200 (task successfully executed)
               * 400 (data validation failed)
               * 404 (cluster not found in db)
        """
        try:
            conf = jsonutils.loads(web.data()) if web.data() else None
            task_manager = DumpTaskManager()
            task = task_manager.execute(
                conf=conf,
                auth_token=web.ctx.env.get('HTTP_X_AUTH_TOKEN'))
        except Exception as exc:
            logger.warn(u'DumpTask: error while execution '
                        'dump environment task: {0}'.format(str(exc)))
            raise self.http(400, str(exc))

        self.raise_task(task)


class LogPackageDefaultConfig(BaseHandler):

    @handle_errors
    @validate
    @serialize
    def GET(self):
        """Generates default config for snapshot

        :http: * 200
        """
        return DumpTask.conf()


class LogSourceCollectionHandler(BaseHandler):
    """Log source collection handler"""

    @serialize
    def GET(self):
        """:returns: Collection of log sources (from settings)

        :http: * 200 (OK)
        """
        return settings.LOGS


class SnapshotDownloadHandler(BaseHandler):

    def GET(self, snapshot_name):
        """:returns: empty response

        :resheader X-Accel-Redirect: snapshot_name
        :http: * 200 (OK)
               * 401 (Unauthorized)
               * 404 (Snapshot with given name does not exist)
        """

        web.header('X-Accel-Redirect', '/dump/' + snapshot_name)
        return ''


class LogSourceByNodeCollectionHandler(BaseHandler):
    """Log source by node collection handler"""

    @handle_errors
    @validate
    @serialize
    def GET(self, node_id):
        """:returns: Collection of log sources by node (from settings)

        :http: * 200 (OK)
               * 404 (node not found in db)
        """
        node = self.get_object_or_404(objects.Node, node_id)

        def getpath(x):
            if x.get('fake'):
                if settings.FAKE_TASKS:
                    return x['path']
                else:
                    return ''
            else:
                if node.status == consts.NODE_STATUSES.discover:
                    ndir = node.ip
                else:
                    ndir = objects.Node.get_node_fqdn(node)
                return os.path.join(x['base'], ndir, x['path'])

        f = lambda x: (
            x.get('remote') and x.get('path') and x.get('base') and
            os.access(getpath(x), os.R_OK) and os.path.isfile(getpath(x))
        )
        sources = filter(f, settings.LOGS)
        return sources