monasca-agent/monasca_agent/collector/checks/check.py

# (C) Copyright 2015 Hewlett Packard Enterprise Development Company LP
"""Base class for Checks.

If you are writing your own checks you should subclass the AgentCheck class.
The Check class is being deprecated so don't write new checks with it.
"""
# This file uses 'print' as a function rather than a statement, a la Python3
from __future__ import print_function

import logging
import os
import pprint
import re
import time
import traceback

import yaml

import monasca_agent.common.aggregator as aggregator
import monasca_agent.common.check_status as check_status
import monasca_agent.common.exceptions as exceptions
import monasca_agent.common.metrics as metrics_pkg
import monasca_agent.common.util as util


# todo convert all checks to the new interface then remove this and Laconic filter which isn't used elsewhere
# =============================================================================
# DEPRECATED
# ------------------------------
# If you are writing your own check, you should inherit from AgentCheck
# and not this class. This class will be removed in a future version
# of the agent and is currently only used for Windows.
# =============================================================================
class Check(util.Dimensions):

    """(Abstract) class for all checks with the ability to:

    * store 1 (and only 1) sample for gauges per metric/dimensions combination
    * compute rates for counters
    * only log error messages once (instead of each time they occur)
    """

    def __init__(self, logger, agent_config=None):
        # where to store samples, indexed by metric_name
        # metric_name: {("sorted", "dimensions"): [(ts, value), (ts, value)],
        #                 tuple(dimensions) are stored as a key since lists are not hashable
        #               None: [(ts, value), (ts, value)]}
        #                 untagged values are indexed by None
        super(Check, self).__init__(agent_config)
        self._sample_store = {}
        self._counters = {}  # metric_name: bool
        self.logger = logger
        try:
            self.logger.addFilter(util.LaconicFilter())
        except Exception:
            self.logger.exception("Trying to install laconic log filter and failed")

    @staticmethod
    def normalize(metric, prefix=None):
        """Turn a metric into a well-formed metric name

        prefix.b.c
        """
        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
        # Eliminate multiple _
        name = re.sub(r"__+", "_", name)
        # Don't start/end with _
        name = re.sub(r"^_", "", name)
        name = re.sub(r"_$", "", name)
        # Drop ._ and _.
        name = re.sub(r"\._", ".", name)
        name = re.sub(r"_\.", ".", name)

        if prefix is not None:
            return prefix + "." + name
        else:
            return name

    @staticmethod
    def normalize_device_name(device_name):
        return device_name.strip().lower().replace(' ', '_')

    def counter(self, metric):
        """Treats the metric as a counter, i.e. computes its per second derivative

        ACHTUNG: Resets previous values associated with this metric.
        """
        self._counters[metric] = True
        self._sample_store[metric] = {}

    def is_counter(self, metric):
        """Is this metric a counter?
        """
        return metric in self._counters

    def gauge(self, metric):
        """Treats the metric as a gauge, i.e. keep the data as is

        ACHTUNG: Resets previous values associated with this metric.
        """
        self._sample_store[metric] = {}

    def is_metric(self, metric):
        return metric in self._sample_store

    def is_gauge(self, metric):
        return self.is_metric(metric) and not self.is_counter(metric)

    def get_metric_names(self):
        """Get all metric names.
        """
        return self._sample_store.keys()

    def save_gauge(self, metric, value, timestamp=None,
                   dimensions=None, hostname=None, device_name=None):
        """Save a gauge value.
        """
        if not self.is_gauge(metric):
            self.gauge(metric)
        self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)

    def save_sample(self, metric, value, timestamp=None,
                    dimensions=None, hostname=None, device_name=None):
        """Save a simple sample, evict old values if needed.
        """
        if timestamp is None:
            timestamp = time.time()
        if metric not in self._sample_store:
            raise exceptions.CheckException("Saving a sample for an undefined metric: %s" % metric)
        try:
            value = util.cast_metric_val(value)
        except ValueError as ve:
            raise exceptions.NaN(ve)

        # Data eviction rules
        key = (tuple(sorted(dimensions.items())), device_name)
        if self.is_gauge(metric):
            self._sample_store[metric][key] = ((timestamp, value, hostname, device_name), )
        elif self.is_counter(metric):
            if self._sample_store[metric].get(key) is None:
                self._sample_store[metric][key] = [(timestamp, value, hostname, device_name)]
            else:
                self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
                    [(timestamp, value, hostname, device_name)]
        else:
            raise exceptions.CheckException("%s must be either gauge or counter, skipping sample at %s" %
                                            (metric, time.ctime(timestamp)))

        if self.is_gauge(metric):
            # store[metric][dimensions] = (ts, val) - only 1 value allowed
            assert len(self._sample_store[metric][key]) == 1, self._sample_store[metric]
        elif self.is_counter(metric):
            assert len(self._sample_store[metric][key]) in (1, 2), self._sample_store[metric]

    @classmethod
    def _rate(cls, sample1, sample2):
        """Simple rate.
        """
        try:
            rate_interval = sample2[0] - sample1[0]
            if rate_interval == 0:
                raise exceptions.Infinity()

            delta = sample2[1] - sample1[1]
            if delta < 0:
                raise exceptions.UnknownValue()

            return (sample2[0], delta / rate_interval, sample2[2], sample2[3])
        except exceptions.Infinity:
            raise
        except exceptions.UnknownValue:
            raise
        except Exception as e:
            raise exceptions.NaN(e)

    def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
        """Get (timestamp-epoch-style, value).
        """

        # Get the proper dimensions
        key = (tuple(sorted(dimensions.items())), device_name)

        # Never seen this metric
        if metric not in self._sample_store:
            raise exceptions.UnknownValue()

        # Not enough value to compute rate
        elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
            raise exceptions.UnknownValue()

        elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
            res = self._rate(
                self._sample_store[metric][key][-2], self._sample_store[metric][key][-1])
            if expire:
                del self._sample_store[metric][key][:-1]
            return res

        elif self.is_gauge(metric) and len(self._sample_store[metric][key]) >= 1:
            return self._sample_store[metric][key][-1]

        else:
            raise exceptions.UnknownValue()

    def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
        """Return the last value for that metric.
        """
        x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
        assert isinstance(x, tuple) and len(x) == 4, x
        return x[1]

    def get_samples_with_timestamps(self, expire=True):
        """Return all values {metric: (ts, value)} for non-tagged metrics.
        """
        values = {}
        for m in self._sample_store:
            try:
                values[m] = self.get_sample_with_timestamp(m, expire=expire)
            except Exception:
                pass
        return values

    def get_samples(self, expire=True):
        """Return all values {metric: value} for non-tagged metrics.
        """
        values = {}
        for m in self._sample_store:
            try:
                # Discard the timestamp
                values[m] = self.get_sample_with_timestamp(m, expire=expire)[1]
            except Exception:
                pass
        return values

    def get_metrics(self, expire=True, prettyprint=False):
        """Get all metrics, including the ones that are tagged.

        This is the preferred method to retrieve metrics

        @return the list of samples
        @rtype [(metric_name, timestamp, value,
                {"dimensions": {"name1": "key1", "name2": "key2"}}), ...]
        """
        metrics = []
        for m in self._sample_store:
            try:
                for key in self._sample_store[m]:
                    dimensions_list, device_name = key
                    dimensions = dict(dimensions_list)
                    try:
                        ts, val, hostname, device_name = self.get_sample_with_timestamp(
                            m, dimensions, device_name, expire)
                    except exceptions.UnknownValue:
                        continue
                    attributes = {}
                    if dimensions_list:
                        attributes['dimensions'] = self._set_dimensions(dimensions)
                    if hostname:
                        attributes['hostname'] = hostname
                    if device_name:
                        attributes['device'] = device_name
                    metrics.append((m, int(ts), val, attributes))
            except Exception:
                pass
            if prettyprint:
                print("Metrics: {0}".format(metrics))
        return metrics


class AgentCheck(util.Dimensions):

    def __init__(self, name, init_config, agent_config, instances=None):
        """Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agent_config: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        super(AgentCheck, self).__init__(agent_config)
        self.name = name
        self.init_config = init_config
        self.hostname = util.get_hostname()
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        threshold = agent_config.get('recent_point_threshold', None)
        self.aggregator = (
            aggregator.MetricsAggregator(self.hostname,
                                         recent_point_threshold=threshold))

        self.events = []
        self.instances = instances or []
        self.warnings = []
        self.library_versions = None

    def instance_count(self):
        """Return the number of instances that are configured for this check.
        """
        return len(self.instances)

    def gauge(self, metric, value, dimensions=None, delegated_tenant=None, hostname=None,
              device_name=None, timestamp=None, value_meta=None):
        """Record the value of a gauge, with optional dimensions, hostname, value metadata and device name.

        :param metric: The name of the metric
        :param value: The value of the gauge
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param timestamp: (optional) The timestamp for this metric value
        :param value_meta: Additional metadata about this value
        """
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Gauge,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta,
                                      timestamp)

    def increment(self, metric, value=1, dimensions=None, delegated_tenant=None,
                  hostname=None, device_name=None, value_meta=None):
        """Increment a counter with optional dimensions, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to increment by
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param value_meta: Additional metadata about this value
        """
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Counter,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta)

    def decrement(self, metric, value=1, dimensions=None, delegated_tenant=None,
                  hostname=None, device_name=None, value_meta=None):
        """Decrement a counter with optional dimensions, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to decrement by
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param value_meta: Additional metadata about this value
        """
        value *= -1
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Counter,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta)

    def rate(self, metric, value, dimensions=None, delegated_tenant=None,
             hostname=None, device_name=None, value_meta=None):
        """Submit a point for a metric that will be calculated as a rate on flush.

        Values will persist across each call to `check` if there is not enough
        point to generate a rate on the flush.

        :param metric: The name of the metric
        :param value: The value of the rate
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param value_meta: Additional metadata about this value
        """
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Rate,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta)

    def histogram(self, metric, value, dimensions=None, delegated_tenant=None,
                  hostname=None, device_name=None, value_meta=None):
        """Sample a histogram value, with optional dimensions, hostname and device name.

        :param metric: The name of the metric
        :param value: The value to sample for the histogram
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param value_meta: Additional metadata about this value
        """
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Histogram,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta)

    def set(self, metric, value, dimensions=None, delegated_tenant=None,
            hostname=None, device_name=None, value_meta=None):
        """Sample a set value, with optional dimensions, hostname and device name.

        :param metric: The name of the metric
        :param value: The value for the set
        :param dimensions: (optional) A dictionary of dimensions for this metric
        :param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
        :param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
        :param device_name: (optional) The device name for this metric
        :param value_meta: Additional metadata about this value
        """
        self.aggregator.submit_metric(metric,
                                      value,
                                      metrics_pkg.Set,
                                      dimensions,
                                      delegated_tenant,
                                      hostname,
                                      device_name,
                                      value_meta)

    def event(self, event):
        """Save an event.

        :param event: The event payload as a dictionary. Has the following
        structure:

            {
                "timestamp": int, the epoch timestamp for the event,
                "event_type": string, the event time name,
                "api_key": string, the api key of the account to associate the event with,
                "msg_title": string, the title of the event,
                "msg_text": string, the text body of the event,
                "alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
                    Defaults to 'info'.
                "source_type_name": (optional) string, the source type name,
                "host": (optional) string, the name of the host,
                "dimensions": (optional) a dictionary of dimensions to associate with this event
            }
        """
        if event.get('api_key') is None:
            event['api_key'] = self.agent_config['api_key']
        self.events.append(event)

    def has_events(self):
        """Check whether the check has saved any events

        @return whether or not the check has saved any events
        @rtype boolean
        """
        return len(self.events) > 0

    def get_metrics(self, prettyprint=False):
        """Get all metrics, including the ones that are tagged.

        @return the list of samples
        @rtype list of Measurement objects from monasca_agent.common.metrics
        """
        metrics = self.aggregator.flush()
        if prettyprint:
            for metric in metrics:
                print(" Timestamp:  {0}".format(metric.timestamp))
                print(" Name:       {0}".format(metric.name))
                print(" Value:      {0}".format(metric.value))
                if (metric.delegated_tenant):
                    print(" Delegate ID: {0}".format(metric.delegated_tenant))

                print(" Dimensions: ", end='')
                line = 0
                for name in metric.dimensions:
                    if line != 0:
                        print(" " * 13, end='')
                    print("{0}={1}".format(name, metric.dimensions[name]))
                    line += 1

                print(" Value Meta: ", end='')
                if metric.value_meta:
                    line = 0
                    for name in metric.value_meta:
                        if line != 0:
                            print(" " * 13, end='')
                        print("{0}={1}".format(name, metric.value_meta[name]))
                        line += 1
                else:
                    print('None')
                print("-" * 24)

        return metrics

    def get_events(self):
        """Return a list of the events saved by the check, if any

        @return the list of events saved by this check
        @rtype list of event dictionaries
        """
        events = self.events
        self.events = []
        return events

    def has_warnings(self):
        """Check whether the instance run created any warnings.
        """
        return len(self.warnings) > 0

    def warning(self, warning_message):
        """Add a warning message that will be printed in the info page

        :param warning_message: String. Warning message to be displayed
        """
        self.warnings.append(warning_message)

    def get_library_info(self):
        if self.library_versions is not None:
            return self.library_versions
        try:
            self.library_versions = self.get_library_versions()
        except NotImplementedError:
            pass

    def get_library_versions(self):
        """Should return a string that shows which version

        of the needed libraries are used
        """
        raise NotImplementedError

    def get_warnings(self):
        """Return the list of warnings messages to be displayed in the info page.
        """
        warnings = self.warnings
        self.warnings = []
        return warnings

    def prepare_run(self):
        """Do any setup required before running all instances"""
        return

    def run(self):
        """Run all instances.
        """
        self.prepare_run()

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                self.check(instance)
                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(i,
                                                                  check_status.STATUS_WARNING,
                                                                  warnings=self.get_warnings())
                else:
                    instance_status = check_status.InstanceStatus(i,
                                                                  check_status.STATUS_OK)
            except Exception as e:
                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
                instance_status = check_status.InstanceStatus(i,
                                                              check_status.STATUS_ERROR,
                                                              error=e,
                                                              tb=traceback.format_exc())
            instance_statuses.append(instance_status)
        return instance_statuses

    def check(self, instance):
        """Overriden by the check class. This will be called to run the check.

        :param instance: A dict with the instance information. This will vary
        depending on your config structure.
        """
        raise NotImplementedError()

    @staticmethod
    def stop():
        """To be executed when the agent is being stopped to clean ressources.
        """
        pass

    @classmethod
    def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
        """A method used for testing your check without running the agent.
        """
        if hasattr(yaml, 'CLoader'):
            Loader = yaml.CLoader
        else:
            Loader = yaml.Loader

        if path_to_yaml:
            check_name = os.path.basename(path_to_yaml).split('.')[0]
            try:
                f = open(path_to_yaml)
            except IOError:
                raise Exception('Unable to open yaml config: %s' % path_to_yaml)
            yaml_text = f.read()
            f.close()

        config = yaml.load(yaml_text, Loader=Loader)
        check = cls(check_name, config.get('init_config') or {}, agentConfig or {})

        return check, config.get('instances', [])

    @staticmethod
    def normalize(metric, prefix=None):
        """Turn a metric into a well-formed metric name prefix.b.c

        :param metric The metric name to normalize
        :param prefix A prefix to to add to the normalized name, default None
        """
        name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
        # Eliminate multiple _
        name = re.sub(r"__+", "_", name)
        # Don't start/end with _
        name = re.sub(r"^_", "", name)
        name = re.sub(r"_$", "", name)
        # Drop ._ and _.
        name = re.sub(r"\._", ".", name)
        name = re.sub(r"_\.", ".", name)

        if prefix is not None:
            return prefix + "." + name
        else:
            return name

    @staticmethod
    def read_config(instance, key, message=None, cast=None, optional=False):
        val = instance.get(key)
        if val is None:
            if optional is False:
                message = message or 'Must provide `%s` value in instance config' % key
                raise Exception(message)
            else:
                return val

        if cast is None:
            return val
        else:
            return cast(val)