646 lines
26 KiB
Python
646 lines
26 KiB
Python
# (C) Copyright 2015 Hewlett Packard Enterprise Development Company LP
|
|
"""Base class for Checks.
|
|
|
|
If you are writing your own checks you should subclass the AgentCheck class.
|
|
The Check class is being deprecated so don't write new checks with it.
|
|
"""
|
|
# This file uses 'print' as a function rather than a statement, a la Python3
|
|
from __future__ import print_function
|
|
|
|
import logging
|
|
import os
|
|
import pprint
|
|
import re
|
|
import time
|
|
import traceback
|
|
|
|
import yaml
|
|
|
|
import monasca_agent.common.aggregator as aggregator
|
|
import monasca_agent.common.check_status as check_status
|
|
import monasca_agent.common.exceptions as exceptions
|
|
import monasca_agent.common.metrics as metrics_pkg
|
|
import monasca_agent.common.util as util
|
|
|
|
|
|
# todo convert all checks to the new interface then remove this and Laconic filter which isn't used elsewhere
|
|
# =============================================================================
|
|
# DEPRECATED
|
|
# ------------------------------
|
|
# If you are writing your own check, you should inherit from AgentCheck
|
|
# and not this class. This class will be removed in a future version
|
|
# of the agent and is currently only used for Windows.
|
|
# =============================================================================
|
|
class Check(util.Dimensions):
|
|
|
|
"""(Abstract) class for all checks with the ability to:
|
|
|
|
* store 1 (and only 1) sample for gauges per metric/dimensions combination
|
|
* compute rates for counters
|
|
* only log error messages once (instead of each time they occur)
|
|
"""
|
|
|
|
def __init__(self, logger, agent_config=None):
|
|
# where to store samples, indexed by metric_name
|
|
# metric_name: {("sorted", "dimensions"): [(ts, value), (ts, value)],
|
|
# tuple(dimensions) are stored as a key since lists are not hashable
|
|
# None: [(ts, value), (ts, value)]}
|
|
# untagged values are indexed by None
|
|
super(Check, self).__init__(agent_config)
|
|
self._sample_store = {}
|
|
self._counters = {} # metric_name: bool
|
|
self.logger = logger
|
|
try:
|
|
self.logger.addFilter(util.LaconicFilter())
|
|
except Exception:
|
|
self.logger.exception("Trying to install laconic log filter and failed")
|
|
|
|
@staticmethod
|
|
def normalize(metric, prefix=None):
|
|
"""Turn a metric into a well-formed metric name
|
|
|
|
prefix.b.c
|
|
"""
|
|
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
|
|
# Eliminate multiple _
|
|
name = re.sub(r"__+", "_", name)
|
|
# Don't start/end with _
|
|
name = re.sub(r"^_", "", name)
|
|
name = re.sub(r"_$", "", name)
|
|
# Drop ._ and _.
|
|
name = re.sub(r"\._", ".", name)
|
|
name = re.sub(r"_\.", ".", name)
|
|
|
|
if prefix is not None:
|
|
return prefix + "." + name
|
|
else:
|
|
return name
|
|
|
|
@staticmethod
|
|
def normalize_device_name(device_name):
|
|
return device_name.strip().lower().replace(' ', '_')
|
|
|
|
def counter(self, metric):
|
|
"""Treats the metric as a counter, i.e. computes its per second derivative
|
|
|
|
ACHTUNG: Resets previous values associated with this metric.
|
|
"""
|
|
self._counters[metric] = True
|
|
self._sample_store[metric] = {}
|
|
|
|
def is_counter(self, metric):
|
|
"""Is this metric a counter?
|
|
"""
|
|
return metric in self._counters
|
|
|
|
def gauge(self, metric):
|
|
"""Treats the metric as a gauge, i.e. keep the data as is
|
|
|
|
ACHTUNG: Resets previous values associated with this metric.
|
|
"""
|
|
self._sample_store[metric] = {}
|
|
|
|
def is_metric(self, metric):
|
|
return metric in self._sample_store
|
|
|
|
def is_gauge(self, metric):
|
|
return self.is_metric(metric) and not self.is_counter(metric)
|
|
|
|
def get_metric_names(self):
|
|
"""Get all metric names.
|
|
"""
|
|
return self._sample_store.keys()
|
|
|
|
def save_gauge(self, metric, value, timestamp=None,
|
|
dimensions=None, hostname=None, device_name=None):
|
|
"""Save a gauge value.
|
|
"""
|
|
if not self.is_gauge(metric):
|
|
self.gauge(metric)
|
|
self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
|
|
|
|
def save_sample(self, metric, value, timestamp=None,
|
|
dimensions=None, hostname=None, device_name=None):
|
|
"""Save a simple sample, evict old values if needed.
|
|
"""
|
|
if timestamp is None:
|
|
timestamp = time.time()
|
|
if metric not in self._sample_store:
|
|
raise exceptions.CheckException("Saving a sample for an undefined metric: %s" % metric)
|
|
try:
|
|
value = util.cast_metric_val(value)
|
|
except ValueError as ve:
|
|
raise exceptions.NaN(ve)
|
|
|
|
# Data eviction rules
|
|
key = (tuple(sorted(dimensions.items())), device_name)
|
|
if self.is_gauge(metric):
|
|
self._sample_store[metric][key] = ((timestamp, value, hostname, device_name), )
|
|
elif self.is_counter(metric):
|
|
if self._sample_store[metric].get(key) is None:
|
|
self._sample_store[metric][key] = [(timestamp, value, hostname, device_name)]
|
|
else:
|
|
self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
|
|
[(timestamp, value, hostname, device_name)]
|
|
else:
|
|
raise exceptions.CheckException("%s must be either gauge or counter, skipping sample at %s" %
|
|
(metric, time.ctime(timestamp)))
|
|
|
|
if self.is_gauge(metric):
|
|
# store[metric][dimensions] = (ts, val) - only 1 value allowed
|
|
assert len(self._sample_store[metric][key]) == 1, self._sample_store[metric]
|
|
elif self.is_counter(metric):
|
|
assert len(self._sample_store[metric][key]) in (1, 2), self._sample_store[metric]
|
|
|
|
@classmethod
|
|
def _rate(cls, sample1, sample2):
|
|
"""Simple rate.
|
|
"""
|
|
try:
|
|
rate_interval = sample2[0] - sample1[0]
|
|
if rate_interval == 0:
|
|
raise exceptions.Infinity()
|
|
|
|
delta = sample2[1] - sample1[1]
|
|
if delta < 0:
|
|
raise exceptions.UnknownValue()
|
|
|
|
return (sample2[0], delta / rate_interval, sample2[2], sample2[3])
|
|
except exceptions.Infinity:
|
|
raise
|
|
except exceptions.UnknownValue:
|
|
raise
|
|
except Exception as e:
|
|
raise exceptions.NaN(e)
|
|
|
|
def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
|
|
"""Get (timestamp-epoch-style, value).
|
|
"""
|
|
|
|
# Get the proper dimensions
|
|
key = (tuple(sorted(dimensions.items())), device_name)
|
|
|
|
# Never seen this metric
|
|
if metric not in self._sample_store:
|
|
raise exceptions.UnknownValue()
|
|
|
|
# Not enough value to compute rate
|
|
elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
|
|
raise exceptions.UnknownValue()
|
|
|
|
elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
|
|
res = self._rate(
|
|
self._sample_store[metric][key][-2], self._sample_store[metric][key][-1])
|
|
if expire:
|
|
del self._sample_store[metric][key][:-1]
|
|
return res
|
|
|
|
elif self.is_gauge(metric) and len(self._sample_store[metric][key]) >= 1:
|
|
return self._sample_store[metric][key][-1]
|
|
|
|
else:
|
|
raise exceptions.UnknownValue()
|
|
|
|
def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
|
|
"""Return the last value for that metric.
|
|
"""
|
|
x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
|
|
assert isinstance(x, tuple) and len(x) == 4, x
|
|
return x[1]
|
|
|
|
def get_samples_with_timestamps(self, expire=True):
|
|
"""Return all values {metric: (ts, value)} for non-tagged metrics.
|
|
"""
|
|
values = {}
|
|
for m in self._sample_store:
|
|
try:
|
|
values[m] = self.get_sample_with_timestamp(m, expire=expire)
|
|
except Exception:
|
|
pass
|
|
return values
|
|
|
|
def get_samples(self, expire=True):
|
|
"""Return all values {metric: value} for non-tagged metrics.
|
|
"""
|
|
values = {}
|
|
for m in self._sample_store:
|
|
try:
|
|
# Discard the timestamp
|
|
values[m] = self.get_sample_with_timestamp(m, expire=expire)[1]
|
|
except Exception:
|
|
pass
|
|
return values
|
|
|
|
def get_metrics(self, expire=True, prettyprint=False):
|
|
"""Get all metrics, including the ones that are tagged.
|
|
|
|
This is the preferred method to retrieve metrics
|
|
|
|
@return the list of samples
|
|
@rtype [(metric_name, timestamp, value,
|
|
{"dimensions": {"name1": "key1", "name2": "key2"}}), ...]
|
|
"""
|
|
metrics = []
|
|
for m in self._sample_store:
|
|
try:
|
|
for key in self._sample_store[m]:
|
|
dimensions_list, device_name = key
|
|
dimensions = dict(dimensions_list)
|
|
try:
|
|
ts, val, hostname, device_name = self.get_sample_with_timestamp(
|
|
m, dimensions, device_name, expire)
|
|
except exceptions.UnknownValue:
|
|
continue
|
|
attributes = {}
|
|
if dimensions_list:
|
|
attributes['dimensions'] = self._set_dimensions(dimensions)
|
|
if hostname:
|
|
attributes['hostname'] = hostname
|
|
if device_name:
|
|
attributes['device'] = device_name
|
|
metrics.append((m, int(ts), val, attributes))
|
|
except Exception:
|
|
pass
|
|
if prettyprint:
|
|
print("Metrics: {0}".format(metrics))
|
|
return metrics
|
|
|
|
|
|
class AgentCheck(util.Dimensions):
|
|
|
|
def __init__(self, name, init_config, agent_config, instances=None):
|
|
"""Initialize a new check.
|
|
|
|
:param name: The name of the check
|
|
:param init_config: The config for initializing the check
|
|
:param agent_config: The global configuration for the agent
|
|
:param instances: A list of configuration objects for each instance.
|
|
"""
|
|
super(AgentCheck, self).__init__(agent_config)
|
|
self.name = name
|
|
self.init_config = init_config
|
|
self.hostname = util.get_hostname()
|
|
self.log = logging.getLogger('%s.%s' % (__name__, name))
|
|
|
|
threshold = agent_config.get('recent_point_threshold', None)
|
|
self.aggregator = (
|
|
aggregator.MetricsAggregator(self.hostname,
|
|
recent_point_threshold=threshold))
|
|
|
|
self.events = []
|
|
self.instances = instances or []
|
|
self.warnings = []
|
|
self.library_versions = None
|
|
|
|
def instance_count(self):
|
|
"""Return the number of instances that are configured for this check.
|
|
"""
|
|
return len(self.instances)
|
|
|
|
def gauge(self, metric, value, dimensions=None, delegated_tenant=None, hostname=None,
|
|
device_name=None, timestamp=None, value_meta=None):
|
|
"""Record the value of a gauge, with optional dimensions, hostname, value metadata and device name.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value of the gauge
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param timestamp: (optional) The timestamp for this metric value
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Gauge,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta,
|
|
timestamp)
|
|
|
|
def increment(self, metric, value=1, dimensions=None, delegated_tenant=None,
|
|
hostname=None, device_name=None, value_meta=None):
|
|
"""Increment a counter with optional dimensions, hostname and device name.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value to increment by
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Counter,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta)
|
|
|
|
def decrement(self, metric, value=1, dimensions=None, delegated_tenant=None,
|
|
hostname=None, device_name=None, value_meta=None):
|
|
"""Decrement a counter with optional dimensions, hostname and device name.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value to decrement by
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
value *= -1
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Counter,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta)
|
|
|
|
def rate(self, metric, value, dimensions=None, delegated_tenant=None,
|
|
hostname=None, device_name=None, value_meta=None):
|
|
"""Submit a point for a metric that will be calculated as a rate on flush.
|
|
|
|
Values will persist across each call to `check` if there is not enough
|
|
point to generate a rate on the flush.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value of the rate
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Rate,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta)
|
|
|
|
def histogram(self, metric, value, dimensions=None, delegated_tenant=None,
|
|
hostname=None, device_name=None, value_meta=None):
|
|
"""Sample a histogram value, with optional dimensions, hostname and device name.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value to sample for the histogram
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Histogram,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta)
|
|
|
|
def set(self, metric, value, dimensions=None, delegated_tenant=None,
|
|
hostname=None, device_name=None, value_meta=None):
|
|
"""Sample a set value, with optional dimensions, hostname and device name.
|
|
|
|
:param metric: The name of the metric
|
|
:param value: The value for the set
|
|
:param dimensions: (optional) A dictionary of dimensions for this metric
|
|
:param delegated_tenant: (optional) Submit metrics on behalf of this tenant ID.
|
|
:param hostname: (optional) A hostname for this metric. Defaults to the current hostname.
|
|
:param device_name: (optional) The device name for this metric
|
|
:param value_meta: Additional metadata about this value
|
|
"""
|
|
self.aggregator.submit_metric(metric,
|
|
value,
|
|
metrics_pkg.Set,
|
|
dimensions,
|
|
delegated_tenant,
|
|
hostname,
|
|
device_name,
|
|
value_meta)
|
|
|
|
def event(self, event):
|
|
"""Save an event.
|
|
|
|
:param event: The event payload as a dictionary. Has the following
|
|
structure:
|
|
|
|
{
|
|
"timestamp": int, the epoch timestamp for the event,
|
|
"event_type": string, the event time name,
|
|
"api_key": string, the api key of the account to associate the event with,
|
|
"msg_title": string, the title of the event,
|
|
"msg_text": string, the text body of the event,
|
|
"alert_type": (optional) string, one of ('error', 'warning', 'success', 'info').
|
|
Defaults to 'info'.
|
|
"source_type_name": (optional) string, the source type name,
|
|
"host": (optional) string, the name of the host,
|
|
"dimensions": (optional) a dictionary of dimensions to associate with this event
|
|
}
|
|
"""
|
|
if event.get('api_key') is None:
|
|
event['api_key'] = self.agent_config['api_key']
|
|
self.events.append(event)
|
|
|
|
def has_events(self):
|
|
"""Check whether the check has saved any events
|
|
|
|
@return whether or not the check has saved any events
|
|
@rtype boolean
|
|
"""
|
|
return len(self.events) > 0
|
|
|
|
def get_metrics(self, prettyprint=False):
|
|
"""Get all metrics, including the ones that are tagged.
|
|
|
|
@return the list of samples
|
|
@rtype list of Measurement objects from monasca_agent.common.metrics
|
|
"""
|
|
metrics = self.aggregator.flush()
|
|
if prettyprint:
|
|
for metric in metrics:
|
|
print(" Timestamp: {0}".format(metric.timestamp))
|
|
print(" Name: {0}".format(metric.name))
|
|
print(" Value: {0}".format(metric.value))
|
|
if (metric.delegated_tenant):
|
|
print(" Delegate ID: {0}".format(metric.delegated_tenant))
|
|
|
|
print(" Dimensions: ", end='')
|
|
line = 0
|
|
for name in metric.dimensions:
|
|
if line != 0:
|
|
print(" " * 13, end='')
|
|
print("{0}={1}".format(name, metric.dimensions[name]))
|
|
line += 1
|
|
|
|
print(" Value Meta: ", end='')
|
|
if metric.value_meta:
|
|
line = 0
|
|
for name in metric.value_meta:
|
|
if line != 0:
|
|
print(" " * 13, end='')
|
|
print("{0}={1}".format(name, metric.value_meta[name]))
|
|
line += 1
|
|
else:
|
|
print('None')
|
|
print("-" * 24)
|
|
|
|
return metrics
|
|
|
|
def get_events(self):
|
|
"""Return a list of the events saved by the check, if any
|
|
|
|
@return the list of events saved by this check
|
|
@rtype list of event dictionaries
|
|
"""
|
|
events = self.events
|
|
self.events = []
|
|
return events
|
|
|
|
def has_warnings(self):
|
|
"""Check whether the instance run created any warnings.
|
|
"""
|
|
return len(self.warnings) > 0
|
|
|
|
def warning(self, warning_message):
|
|
"""Add a warning message that will be printed in the info page
|
|
|
|
:param warning_message: String. Warning message to be displayed
|
|
"""
|
|
self.warnings.append(warning_message)
|
|
|
|
def get_library_info(self):
|
|
if self.library_versions is not None:
|
|
return self.library_versions
|
|
try:
|
|
self.library_versions = self.get_library_versions()
|
|
except NotImplementedError:
|
|
pass
|
|
|
|
def get_library_versions(self):
|
|
"""Should return a string that shows which version
|
|
|
|
of the needed libraries are used
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def get_warnings(self):
|
|
"""Return the list of warnings messages to be displayed in the info page.
|
|
"""
|
|
warnings = self.warnings
|
|
self.warnings = []
|
|
return warnings
|
|
|
|
def prepare_run(self):
|
|
"""Do any setup required before running all instances"""
|
|
return
|
|
|
|
def run(self):
|
|
"""Run all instances.
|
|
"""
|
|
self.prepare_run()
|
|
|
|
instance_statuses = []
|
|
for i, instance in enumerate(self.instances):
|
|
try:
|
|
self.check(instance)
|
|
if self.has_warnings():
|
|
instance_status = check_status.InstanceStatus(i,
|
|
check_status.STATUS_WARNING,
|
|
warnings=self.get_warnings())
|
|
else:
|
|
instance_status = check_status.InstanceStatus(i,
|
|
check_status.STATUS_OK)
|
|
except Exception as e:
|
|
self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
|
|
instance_status = check_status.InstanceStatus(i,
|
|
check_status.STATUS_ERROR,
|
|
error=e,
|
|
tb=traceback.format_exc())
|
|
instance_statuses.append(instance_status)
|
|
return instance_statuses
|
|
|
|
def check(self, instance):
|
|
"""Overriden by the check class. This will be called to run the check.
|
|
|
|
:param instance: A dict with the instance information. This will vary
|
|
depending on your config structure.
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
def stop():
|
|
"""To be executed when the agent is being stopped to clean ressources.
|
|
"""
|
|
pass
|
|
|
|
@classmethod
|
|
def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
|
|
"""A method used for testing your check without running the agent.
|
|
"""
|
|
if hasattr(yaml, 'CLoader'):
|
|
Loader = yaml.CLoader
|
|
else:
|
|
Loader = yaml.Loader
|
|
|
|
if path_to_yaml:
|
|
check_name = os.path.basename(path_to_yaml).split('.')[0]
|
|
try:
|
|
f = open(path_to_yaml)
|
|
except IOError:
|
|
raise Exception('Unable to open yaml config: %s' % path_to_yaml)
|
|
yaml_text = f.read()
|
|
f.close()
|
|
|
|
config = yaml.load(yaml_text, Loader=Loader)
|
|
check = cls(check_name, config.get('init_config') or {}, agentConfig or {})
|
|
|
|
return check, config.get('instances', [])
|
|
|
|
@staticmethod
|
|
def normalize(metric, prefix=None):
|
|
"""Turn a metric into a well-formed metric name prefix.b.c
|
|
|
|
:param metric The metric name to normalize
|
|
:param prefix A prefix to to add to the normalized name, default None
|
|
"""
|
|
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
|
|
# Eliminate multiple _
|
|
name = re.sub(r"__+", "_", name)
|
|
# Don't start/end with _
|
|
name = re.sub(r"^_", "", name)
|
|
name = re.sub(r"_$", "", name)
|
|
# Drop ._ and _.
|
|
name = re.sub(r"\._", ".", name)
|
|
name = re.sub(r"_\.", ".", name)
|
|
|
|
if prefix is not None:
|
|
return prefix + "." + name
|
|
else:
|
|
return name
|
|
|
|
@staticmethod
|
|
def read_config(instance, key, message=None, cast=None, optional=False):
|
|
val = instance.get(key)
|
|
if val is None:
|
|
if optional is False:
|
|
message = message or 'Must provide `%s` value in instance config' % key
|
|
raise Exception(message)
|
|
else:
|
|
return val
|
|
|
|
if cast is None:
|
|
return val
|
|
else:
|
|
return cast(val)
|