Cleaned up a lot of the Pep8 violations
Cleaned up all but four of the pep8 violations E501 Line length > 80 characters F401 module imported but unused H302 import only modules (DEPRECATED) H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED) Change-Id: Id24bff6c5f8b8630a9495f49983324342841866f
This commit is contained in:
parent
d704b17d97
commit
45b156b9fe
|
@ -8,37 +8,38 @@ from __future__ import print_function
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from pprint import pprint
|
import pprint
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from monagent.common import check_status
|
import yaml
|
||||||
from monagent.common.keystone import Keystone
|
|
||||||
from monagent.common.config import get_confd_path
|
import monagent.common.aggregator
|
||||||
from monagent.common.exceptions import CheckException, NaN, Infinity, UnknownValue
|
import monagent.common.config
|
||||||
from monagent.common.util import LaconicFilter, get_hostname, get_os
|
import monagent.common.exceptions
|
||||||
|
import monagent.common.keystone
|
||||||
|
import monagent.common.util
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# todo convert all checks to the new interface then remove this.
|
# todo convert all checks to the new interface then remove this.
|
||||||
# Is the LaconicFilter on logs used elsewhere?
|
# Is the LaconicFilter on logs used elsewhere?
|
||||||
# ==============================================================================
|
# =============================================================================
|
||||||
# DEPRECATED
|
# DEPRECATED
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# If you are writing your own check, you should inherit from AgentCheck
|
# If you are writing your own check, you should inherit from AgentCheck
|
||||||
# and not this class. This class will be removed in a future version
|
# and not this class. This class will be removed in a future version
|
||||||
# of the agent.
|
# of the agent.
|
||||||
# ==============================================================================
|
# =============================================================================
|
||||||
class Check(object):
|
class Check(object):
|
||||||
|
|
||||||
"""
|
"""(Abstract) class for all checks with the ability to:
|
||||||
(Abstract) class for all checks with the ability to:
|
|
||||||
* store 1 (and only 1) sample for gauges per metric/tag combination
|
* store 1 (and only 1) sample for gauges per metric/tag combination
|
||||||
* compute rates for counters
|
* compute rates for counters
|
||||||
* only log error messages once (instead of each time they occur)
|
* only log error messages once (instead of each time they occur)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, logger, agent_config=None):
|
def __init__(self, logger, agent_config=None):
|
||||||
|
@ -52,13 +53,14 @@ class Check(object):
|
||||||
self._counters = {} # metric_name: bool
|
self._counters = {} # metric_name: bool
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
try:
|
try:
|
||||||
self.logger.addFilter(LaconicFilter())
|
self.logger.addFilter(monagent.common.util.LaconicFilter())
|
||||||
except Exception:
|
except Exception:
|
||||||
self.logger.exception("Trying to install laconic log filter and failed")
|
self.logger.exception("Trying to install laconic log filter and failed")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize(metric, prefix=None):
|
def normalize(metric, prefix=None):
|
||||||
"""Turn a metric into a well-formed metric name
|
"""Turn a metric into a well-formed metric name
|
||||||
|
|
||||||
prefix.b.c
|
prefix.b.c
|
||||||
"""
|
"""
|
||||||
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
|
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
|
||||||
|
@ -81,20 +83,21 @@ class Check(object):
|
||||||
return device_name.strip().lower().replace(' ', '_')
|
return device_name.strip().lower().replace(' ', '_')
|
||||||
|
|
||||||
def counter(self, metric):
|
def counter(self, metric):
|
||||||
"""
|
"""Treats the metric as a counter, i.e. computes its per second derivative
|
||||||
Treats the metric as a counter, i.e. computes its per second derivative
|
|
||||||
ACHTUNG: Resets previous values associated with this metric.
|
ACHTUNG: Resets previous values associated with this metric.
|
||||||
"""
|
"""
|
||||||
self._counters[metric] = True
|
self._counters[metric] = True
|
||||||
self._sample_store[metric] = {}
|
self._sample_store[metric] = {}
|
||||||
|
|
||||||
def is_counter(self, metric):
|
def is_counter(self, metric):
|
||||||
"Is this metric a counter?"
|
"""Is this metric a counter?
|
||||||
|
"""
|
||||||
return metric in self._counters
|
return metric in self._counters
|
||||||
|
|
||||||
def gauge(self, metric):
|
def gauge(self, metric):
|
||||||
"""
|
"""Treats the metric as a gauge, i.e. keep the data as is
|
||||||
Treats the metric as a gauge, i.e. keep the data as is
|
|
||||||
ACHTUNG: Resets previous values associated with this metric.
|
ACHTUNG: Resets previous values associated with this metric.
|
||||||
"""
|
"""
|
||||||
self._sample_store[metric] = {}
|
self._sample_store[metric] = {}
|
||||||
|
@ -106,36 +109,36 @@ class Check(object):
|
||||||
return self.is_metric(metric) and not self.is_counter(metric)
|
return self.is_metric(metric) and not self.is_counter(metric)
|
||||||
|
|
||||||
def get_metric_names(self):
|
def get_metric_names(self):
|
||||||
"Get all metric names"
|
"""Get all metric names.
|
||||||
|
"""
|
||||||
return self._sample_store.keys()
|
return self._sample_store.keys()
|
||||||
|
|
||||||
def save_gauge(self, metric, value, timestamp=None,
|
def save_gauge(self, metric, value, timestamp=None,
|
||||||
dimensions=None, hostname=None, device_name=None):
|
dimensions=None, hostname=None, device_name=None):
|
||||||
""" Save a gauge value. """
|
"""Save a gauge value.
|
||||||
|
"""
|
||||||
if not self.is_gauge(metric):
|
if not self.is_gauge(metric):
|
||||||
self.gauge(metric)
|
self.gauge(metric)
|
||||||
self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
|
self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def save_sample(self, metric, value, timestamp=None,
|
def save_sample(self, metric, value, timestamp=None,
|
||||||
dimensions=None, hostname=None, device_name=None):
|
dimensions=None, hostname=None, device_name=None):
|
||||||
"""Save a simple sample, evict old values if needed
|
"""Save a simple sample, evict old values if needed.
|
||||||
"""
|
"""
|
||||||
if dimensions is None:
|
if dimensions is None:
|
||||||
dimensions = {}
|
dimensions = {}
|
||||||
from common.util import cast_metric_val
|
|
||||||
|
|
||||||
if timestamp is None:
|
if timestamp is None:
|
||||||
timestamp = time.time()
|
timestamp = time.time()
|
||||||
if metric not in self._sample_store:
|
if metric not in self._sample_store:
|
||||||
raise CheckException("Saving a sample for an undefined metric: %s" % metric)
|
raise monagent.common.exceptions.CheckException("Saving a sample for an undefined metric: %s" % metric)
|
||||||
try:
|
try:
|
||||||
value = cast_metric_val(value)
|
value = monagent.common.util.cast_metric_val(value)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
raise NaN(ve)
|
raise monagent.common.exceptions.NaN(ve)
|
||||||
|
|
||||||
# Sort and validate dimensions
|
# Sort and validate dimensions
|
||||||
if dimensions is not None and not isinstance(dimensions, dict):
|
if dimensions is not None and not isinstance(dimensions, dict):
|
||||||
raise CheckException("Dimensions must be a dictionary")
|
raise monagent.common.exceptions.CheckException("Dimensions must be a dictionary")
|
||||||
|
|
||||||
# Data eviction rules
|
# Data eviction rules
|
||||||
key = (tuple(sorted(dimensions.items())), device_name)
|
key = (tuple(sorted(dimensions.items())), device_name)
|
||||||
|
@ -148,8 +151,8 @@ class Check(object):
|
||||||
self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
|
self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
|
||||||
[(timestamp, value, hostname, device_name)]
|
[(timestamp, value, hostname, device_name)]
|
||||||
else:
|
else:
|
||||||
raise CheckException("%s must be either gauge or counter, skipping sample at %s" %
|
raise monagent.common.exceptions.CheckException("%s must be either gauge or counter, skipping sample at %s" %
|
||||||
(metric, time.ctime(timestamp)))
|
(metric, time.ctime(timestamp)))
|
||||||
|
|
||||||
if self.is_gauge(metric):
|
if self.is_gauge(metric):
|
||||||
# store[metric][dimensions] = (ts, val) - only 1 value allowed
|
# store[metric][dimensions] = (ts, val) - only 1 value allowed
|
||||||
|
@ -159,26 +162,27 @@ class Check(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _rate(cls, sample1, sample2):
|
def _rate(cls, sample1, sample2):
|
||||||
"Simple rate"
|
"""Simple rate.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
interval = sample2[0] - sample1[0]
|
interval = sample2[0] - sample1[0]
|
||||||
if interval == 0:
|
if interval == 0:
|
||||||
raise Infinity()
|
raise monagent.common.exceptions.Infinity()
|
||||||
|
|
||||||
delta = sample2[1] - sample1[1]
|
delta = sample2[1] - sample1[1]
|
||||||
if delta < 0:
|
if delta < 0:
|
||||||
raise UnknownValue()
|
raise monagent.common.exceptions.UnknownValue()
|
||||||
|
|
||||||
return (sample2[0], delta / interval, sample2[2], sample2[3])
|
return (sample2[0], delta / interval, sample2[2], sample2[3])
|
||||||
except Infinity:
|
except monagent.common.exceptions.Infinity:
|
||||||
raise
|
raise
|
||||||
except UnknownValue:
|
except monagent.common.exceptions.UnknownValue:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise NaN(e)
|
raise monagent.common.exceptions.NaN(e)
|
||||||
|
|
||||||
def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
|
def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
|
||||||
"""Get (timestamp-epoch-style, value)
|
"""Get (timestamp-epoch-style, value).
|
||||||
"""
|
"""
|
||||||
if dimensions is None:
|
if dimensions is None:
|
||||||
dimensions = {}
|
dimensions = {}
|
||||||
|
@ -188,11 +192,11 @@ class Check(object):
|
||||||
|
|
||||||
# Never seen this metric
|
# Never seen this metric
|
||||||
if metric not in self._sample_store:
|
if metric not in self._sample_store:
|
||||||
raise UnknownValue()
|
raise monagent.common.exceptions.UnknownValue()
|
||||||
|
|
||||||
# Not enough value to compute rate
|
# Not enough value to compute rate
|
||||||
elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
|
elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
|
||||||
raise UnknownValue()
|
raise monagent.common.exceptions.UnknownValue()
|
||||||
|
|
||||||
elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
|
elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
|
||||||
res = self._rate(
|
res = self._rate(
|
||||||
|
@ -205,16 +209,18 @@ class Check(object):
|
||||||
return self._sample_store[metric][key][-1]
|
return self._sample_store[metric][key][-1]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise UnknownValue()
|
raise monagent.common.exceptions.UnknownValue()
|
||||||
|
|
||||||
def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
|
def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
|
||||||
"Return the last value for that metric"
|
"""Return the last value for that metric.
|
||||||
|
"""
|
||||||
x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
|
x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
|
||||||
assert isinstance(x, tuple) and len(x) == 4, x
|
assert isinstance(x, tuple) and len(x) == 4, x
|
||||||
return x[1]
|
return x[1]
|
||||||
|
|
||||||
def get_samples_with_timestamps(self, expire=True):
|
def get_samples_with_timestamps(self, expire=True):
|
||||||
"Return all values {metric: (ts, value)} for non-tagged metrics"
|
"""Return all values {metric: (ts, value)} for non-tagged metrics.
|
||||||
|
"""
|
||||||
values = {}
|
values = {}
|
||||||
for m in self._sample_store:
|
for m in self._sample_store:
|
||||||
try:
|
try:
|
||||||
|
@ -224,7 +230,8 @@ class Check(object):
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def get_samples(self, expire=True):
|
def get_samples(self, expire=True):
|
||||||
"Return all values {metric: value} for non-tagged metrics"
|
"""Return all values {metric: value} for non-tagged metrics.
|
||||||
|
"""
|
||||||
values = {}
|
values = {}
|
||||||
for m in self._sample_store:
|
for m in self._sample_store:
|
||||||
try:
|
try:
|
||||||
|
@ -234,8 +241,9 @@ class Check(object):
|
||||||
pass
|
pass
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def get_metrics(self, expire=True):
|
def get_metrics(self, expire=True, prettyprint=False):
|
||||||
"""Get all metrics, including the ones that are tagged.
|
"""Get all metrics, including the ones that are tagged.
|
||||||
|
|
||||||
This is the preferred method to retrieve metrics
|
This is the preferred method to retrieve metrics
|
||||||
|
|
||||||
@return the list of samples
|
@return the list of samples
|
||||||
|
@ -251,7 +259,7 @@ class Check(object):
|
||||||
try:
|
try:
|
||||||
ts, val, hostname, device_name = self.get_sample_with_timestamp(
|
ts, val, hostname, device_name = self.get_sample_with_timestamp(
|
||||||
m, dimensions, device_name, expire)
|
m, dimensions, device_name, expire)
|
||||||
except UnknownValue:
|
except monagent.common.exceptions.UnknownValue:
|
||||||
continue
|
continue
|
||||||
attributes = {}
|
attributes = {}
|
||||||
if dimensions_list:
|
if dimensions_list:
|
||||||
|
@ -273,27 +281,22 @@ class AgentCheck(object):
|
||||||
keystone = None
|
keystone = None
|
||||||
|
|
||||||
def __init__(self, name, init_config, agent_config, instances=None):
|
def __init__(self, name, init_config, agent_config, instances=None):
|
||||||
"""
|
"""Initialize a new check.
|
||||||
Initialize a new check.
|
|
||||||
|
|
||||||
:param name: The name of the check
|
:param name: The name of the check
|
||||||
:param init_config: The config for initializing the check
|
:param init_config: The config for initializing the check
|
||||||
:param agent_config: The global configuration for the agent
|
:param agent_config: The global configuration for the agent
|
||||||
:param instances: A list of configuration objects for each instance.
|
:param instances: A list of configuration objects for each instance.
|
||||||
"""
|
"""
|
||||||
from monagent.common.aggregator import MetricsAggregator
|
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.init_config = init_config
|
self.init_config = init_config
|
||||||
self.agent_config = agent_config
|
self.agent_config = agent_config
|
||||||
self.hostname = get_hostname(agent_config)
|
self.hostname = monagent.common.util.get_hostname(agent_config)
|
||||||
self.log = logging.getLogger('%s.%s' % (__name__, name))
|
self.log = logging.getLogger('%s.%s' % (__name__, name))
|
||||||
|
|
||||||
self.aggregator = MetricsAggregator(
|
self.aggregator = monagent.common.aggregator.MetricsAggregator(self.hostname,
|
||||||
self.hostname,
|
recent_point_threshold=agent_config.get('recent_point_threshold',
|
||||||
recent_point_threshold=agent_config.get(
|
None))
|
||||||
'recent_point_threshold',
|
|
||||||
None))
|
|
||||||
|
|
||||||
self.events = []
|
self.events = []
|
||||||
self.instances = instances or []
|
self.instances = instances or []
|
||||||
|
@ -301,20 +304,19 @@ class AgentCheck(object):
|
||||||
self.library_versions = None
|
self.library_versions = None
|
||||||
|
|
||||||
api_config = self.agent_config['Api']
|
api_config = self.agent_config['Api']
|
||||||
AgentCheck.keystone = Keystone(api_config['keystone_url'],
|
AgentCheck.keystone = monagent.common.keystone.Keystone(api_config['keystone_url'],
|
||||||
api_config['username'],
|
api_config['username'],
|
||||||
api_config['password'],
|
api_config['password'],
|
||||||
api_config['project_name'])
|
api_config['project_name'])
|
||||||
|
|
||||||
def instance_count(self):
|
def instance_count(self):
|
||||||
""" Return the number of instances that are configured for this check. """
|
"""Return the number of instances that are configured for this check.
|
||||||
|
"""
|
||||||
return len(self.instances)
|
return len(self.instances)
|
||||||
|
|
||||||
def gauge(self, metric, value, dimensions=None,
|
def gauge(self, metric, value, dimensions=None,
|
||||||
hostname=None, device_name=None, timestamp=None):
|
hostname=None, device_name=None, timestamp=None):
|
||||||
"""
|
"""Record the value of a gauge, with optional dimensions, hostname and device name.
|
||||||
Record the value of a gauge, with optional dimensions, hostname and device
|
|
||||||
name.
|
|
||||||
|
|
||||||
:param metric: The name of the metric
|
:param metric: The name of the metric
|
||||||
:param value: The value of the gauge
|
:param value: The value of the gauge
|
||||||
|
@ -326,8 +328,7 @@ class AgentCheck(object):
|
||||||
self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
|
self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
|
||||||
|
|
||||||
def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
|
def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
|
||||||
"""
|
"""Increment a counter with optional dimensions, hostname and device name.
|
||||||
Increment a counter with optional dimensions, hostname and device name.
|
|
||||||
|
|
||||||
:param metric: The name of the metric
|
:param metric: The name of the metric
|
||||||
:param value: The value to increment by
|
:param value: The value to increment by
|
||||||
|
@ -338,8 +339,7 @@ class AgentCheck(object):
|
||||||
self.aggregator.increment(metric, value, dimensions, hostname, device_name)
|
self.aggregator.increment(metric, value, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
|
def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
|
||||||
"""
|
"""Decrement a counter with optional dimensions, hostname and device name.
|
||||||
Increment a counter with optional dimensions, hostname and device name.
|
|
||||||
|
|
||||||
:param metric: The name of the metric
|
:param metric: The name of the metric
|
||||||
:param value: The value to decrement by
|
:param value: The value to decrement by
|
||||||
|
@ -350,8 +350,8 @@ class AgentCheck(object):
|
||||||
self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
|
self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
||||||
"""
|
"""Submit a point for a metric that will be calculated as a rate on flush.
|
||||||
Submit a point for a metric that will be calculated as a rate on flush.
|
|
||||||
Values will persist across each call to `check` if there is not enough
|
Values will persist across each call to `check` if there is not enough
|
||||||
point to generate a rate on the flush.
|
point to generate a rate on the flush.
|
||||||
|
|
||||||
|
@ -364,8 +364,7 @@ class AgentCheck(object):
|
||||||
self.aggregator.rate(metric, value, dimensions, hostname, device_name)
|
self.aggregator.rate(metric, value, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
||||||
"""
|
"""Sample a histogram value, with optional dimensions, hostname and device name.
|
||||||
Sample a histogram value, with optional dimensions, hostname and device name.
|
|
||||||
|
|
||||||
:param metric: The name of the metric
|
:param metric: The name of the metric
|
||||||
:param value: The value to sample for the histogram
|
:param value: The value to sample for the histogram
|
||||||
|
@ -376,8 +375,7 @@ class AgentCheck(object):
|
||||||
self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
|
self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
|
||||||
"""
|
"""Sample a set value, with optional dimensions, hostname and device name.
|
||||||
Sample a set value, with optional dimensions, hostname and device name.
|
|
||||||
|
|
||||||
:param metric: The name of the metric
|
:param metric: The name of the metric
|
||||||
:param value: The value for the set
|
:param value: The value for the set
|
||||||
|
@ -388,8 +386,7 @@ class AgentCheck(object):
|
||||||
self.aggregator.set(metric, value, dimensions, hostname, device_name)
|
self.aggregator.set(metric, value, dimensions, hostname, device_name)
|
||||||
|
|
||||||
def event(self, event):
|
def event(self, event):
|
||||||
"""
|
"""Save an event.
|
||||||
Save an event.
|
|
||||||
|
|
||||||
:param event: The event payload as a dictionary. Has the following
|
:param event: The event payload as a dictionary. Has the following
|
||||||
structure:
|
structure:
|
||||||
|
@ -412,8 +409,7 @@ class AgentCheck(object):
|
||||||
self.events.append(event)
|
self.events.append(event)
|
||||||
|
|
||||||
def has_events(self):
|
def has_events(self):
|
||||||
"""
|
"""Check whether the check has saved any events
|
||||||
Check whether the check has saved any events
|
|
||||||
|
|
||||||
@return whether or not the check has saved any events
|
@return whether or not the check has saved any events
|
||||||
@rtype boolean
|
@rtype boolean
|
||||||
|
@ -421,8 +417,7 @@ class AgentCheck(object):
|
||||||
return len(self.events) > 0
|
return len(self.events) > 0
|
||||||
|
|
||||||
def get_metrics(self, prettyprint=False):
|
def get_metrics(self, prettyprint=False):
|
||||||
"""
|
"""Get all metrics, including the ones that are tagged.
|
||||||
Get all metrics, including the ones that are tagged.
|
|
||||||
|
|
||||||
@return the list of samples
|
@return the list of samples
|
||||||
@rtype list of Measurement objects from monagent.common.metrics
|
@rtype list of Measurement objects from monagent.common.metrics
|
||||||
|
@ -444,8 +439,7 @@ class AgentCheck(object):
|
||||||
return self.aggregator.flush()
|
return self.aggregator.flush()
|
||||||
|
|
||||||
def get_events(self):
|
def get_events(self):
|
||||||
"""
|
"""Return a list of the events saved by the check, if any
|
||||||
Return a list of the events saved by the check, if any
|
|
||||||
|
|
||||||
@return the list of events saved by this check
|
@return the list of events saved by this check
|
||||||
@rtype list of event dictionaries
|
@rtype list of event dictionaries
|
||||||
|
@ -455,13 +449,13 @@ class AgentCheck(object):
|
||||||
return events
|
return events
|
||||||
|
|
||||||
def has_warnings(self):
|
def has_warnings(self):
|
||||||
"""
|
"""Check whether the instance run created any warnings.
|
||||||
Check whether the instance run created any warnings
|
|
||||||
"""
|
"""
|
||||||
return len(self.warnings) > 0
|
return len(self.warnings) > 0
|
||||||
|
|
||||||
def warning(self, warning_message):
|
def warning(self, warning_message):
|
||||||
""" Add a warning message that will be printed in the info page
|
"""Add a warning message that will be printed in the info page
|
||||||
|
|
||||||
:param warning_message: String. Warning message to be displayed
|
:param warning_message: String. Warning message to be displayed
|
||||||
"""
|
"""
|
||||||
self.warnings.append(warning_message)
|
self.warnings.append(warning_message)
|
||||||
|
@ -475,43 +469,45 @@ class AgentCheck(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_library_versions(self):
|
def get_library_versions(self):
|
||||||
""" Should return a string that shows which version
|
"""Should return a string that shows which version
|
||||||
of the needed libraries are used """
|
|
||||||
|
of the needed libraries are used
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_warnings(self):
|
def get_warnings(self):
|
||||||
"""
|
"""Return the list of warnings messages to be displayed in the info page.
|
||||||
Return the list of warnings messages to be displayed in the info page
|
|
||||||
"""
|
"""
|
||||||
warnings = self.warnings
|
warnings = self.warnings
|
||||||
self.warnings = []
|
self.warnings = []
|
||||||
return warnings
|
return warnings
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
""" Run all instances. """
|
"""Run all instances.
|
||||||
|
"""
|
||||||
instance_statuses = []
|
instance_statuses = []
|
||||||
for i, instance in enumerate(self.instances):
|
for i, instance in enumerate(self.instances):
|
||||||
try:
|
try:
|
||||||
instance['keystone'] = AgentCheck.keystone
|
instance['keystone'] = AgentCheck.keystone
|
||||||
self.check(instance)
|
self.check(instance)
|
||||||
if self.has_warnings():
|
if self.has_warnings():
|
||||||
instance_status = check_status.InstanceStatus(i,
|
instance_status = monagent.common.check_status.InstanceStatus(i,
|
||||||
check_status.STATUS_WARNING,
|
monagent.common.check_status.STATUS_WARNING,
|
||||||
warnings=self.get_warnings())
|
warnings=self.get_warnings())
|
||||||
else:
|
else:
|
||||||
instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK)
|
instance_status = monagent.common.check_status.InstanceStatus(i,
|
||||||
|
monagent.common.check_status.STATUS_OK)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
|
self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
|
||||||
instance_status = check_status.InstanceStatus(i,
|
instance_status = monagent.common.check_status.InstanceStatus(i,
|
||||||
check_status.STATUS_ERROR,
|
monagent.common.check_status.STATUS_ERROR,
|
||||||
error=e,
|
error=e,
|
||||||
tb=traceback.format_exc())
|
tb=traceback.format_exc())
|
||||||
instance_statuses.append(instance_status)
|
instance_statuses.append(instance_status)
|
||||||
return instance_statuses
|
return instance_statuses
|
||||||
|
|
||||||
def check(self, instance):
|
def check(self, instance):
|
||||||
"""
|
"""Overriden by the check class. This will be called to run the check.
|
||||||
Overriden by the check class. This will be called to run the check.
|
|
||||||
|
|
||||||
:param instance: A dict with the instance information. This will vary
|
:param instance: A dict with the instance information. This will vary
|
||||||
depending on your config structure.
|
depending on your config structure.
|
||||||
|
@ -520,21 +516,19 @@ class AgentCheck(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def stop():
|
def stop():
|
||||||
"""
|
"""To be executed when the agent is being stopped to clean ressources.
|
||||||
To be executed when the agent is being stopped to clean ressources
|
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
|
def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
|
||||||
|
"""A method used for testing your check without running the agent.
|
||||||
"""
|
"""
|
||||||
A method used for testing your check without running the agent.
|
if hasattr(yaml, 'CLoader'):
|
||||||
"""
|
Loader = yaml.CLoader
|
||||||
import yaml
|
else:
|
||||||
try:
|
Loader = yaml.Loader
|
||||||
from yaml import CLoader as Loader
|
|
||||||
except ImportError:
|
|
||||||
from yaml import Loader
|
|
||||||
if path_to_yaml:
|
if path_to_yaml:
|
||||||
check_name = os.path.basename(path_to_yaml).split('.')[0]
|
check_name = os.path.basename(path_to_yaml).split('.')[0]
|
||||||
try:
|
try:
|
||||||
|
@ -551,9 +545,7 @@ class AgentCheck(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize(metric, prefix=None):
|
def normalize(metric, prefix=None):
|
||||||
"""
|
"""Turn a metric into a well-formed metric name prefix.b.c
|
||||||
Turn a metric into a well-formed metric name
|
|
||||||
prefix.b.c
|
|
||||||
|
|
||||||
:param metric The metric name to normalize
|
:param metric The metric name to normalize
|
||||||
:param prefix A prefix to to add to the normalized name, default None
|
:param prefix A prefix to to add to the normalized name, default None
|
||||||
|
@ -587,10 +579,11 @@ class AgentCheck(object):
|
||||||
|
|
||||||
|
|
||||||
def run_check(name, path=None):
|
def run_check(name, path=None):
|
||||||
from tests.common import get_check
|
import tests.common
|
||||||
|
|
||||||
# Read the config file
|
# Read the config file
|
||||||
confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name)
|
confd_path = path or os.path.join(monagent.common.config.get_confd_path(monagent.common.util.get_os()),
|
||||||
|
'%s.yaml' % name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = open(confd_path)
|
f = open(confd_path)
|
||||||
|
@ -601,13 +594,13 @@ def run_check(name, path=None):
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# Run the check
|
# Run the check
|
||||||
check, instances = get_check(name, config_str)
|
check, instances = tests.common.get_check(name, config_str)
|
||||||
if not instances:
|
if not instances:
|
||||||
raise Exception('YAML configuration returned no instances.')
|
raise Exception('YAML configuration returned no instances.')
|
||||||
for instance in instances:
|
for instance in instances:
|
||||||
check.check(instance)
|
check.check(instance)
|
||||||
if check.has_events():
|
if check.has_events():
|
||||||
print("Events:\n")
|
print("Events:\n")
|
||||||
pprint(check.get_events(), indent=4)
|
pprint.pprint(check.get_events(), indent=4)
|
||||||
print("Metrics:\n")
|
print("Metrics:\n")
|
||||||
pprint(check.get_metrics(), indent=4)
|
pprint.pprint(check.get_metrics(), indent=4)
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
# Core modules
|
# Core modules
|
||||||
import logging
|
import logging
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
from monagent.common.metrics import Measurement
|
|
||||||
from monagent.common.util import get_os, Timer
|
|
||||||
import system.unix as u
|
import system.unix as u
|
||||||
import system.win32 as w32
|
import system.win32 as w32
|
||||||
from datadog import Dogstreams
|
import threading
|
||||||
from monagent.common.check_status import CheckStatus, CollectorStatus, EmitterStatus
|
import time
|
||||||
|
|
||||||
|
import monagent.common.check_status
|
||||||
|
import monagent.common.metrics
|
||||||
|
import monagent.common.util
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -25,15 +24,15 @@ FLUSH_LOGGING_INITIAL = 5
|
||||||
|
|
||||||
class Collector(object):
|
class Collector(object):
|
||||||
|
|
||||||
"""
|
"""The collector is responsible for collecting data from each check and
|
||||||
The collector is responsible for collecting data from each check and
|
|
||||||
passing it along to the emitters, who send it to their final destination.
|
passing it along to the emitters, who send it to their final destination.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, agent_config, emitter, checksd=None):
|
def __init__(self, agent_config, emitter, checksd=None):
|
||||||
self.emit_duration = None
|
self.emit_duration = None
|
||||||
self.agent_config = agent_config
|
self.agent_config = agent_config
|
||||||
self.os = get_os()
|
self.os = monagent.common.util.get_os()
|
||||||
self.plugins = None
|
self.plugins = None
|
||||||
self.emitter = emitter
|
self.emitter = emitter
|
||||||
socket.setdefaulttimeout(15)
|
socket.setdefaulttimeout(15)
|
||||||
|
@ -74,23 +73,24 @@ class Collector(object):
|
||||||
self.init_failed_checks_d = checksd['init_failed_checks']
|
self.init_failed_checks_d = checksd['init_failed_checks']
|
||||||
|
|
||||||
def _emit(self, payload):
|
def _emit(self, payload):
|
||||||
""" Send the payload via the emitter. """
|
"""Send the payload via the emitter.
|
||||||
|
"""
|
||||||
statuses = []
|
statuses = []
|
||||||
# Don't try to send to an emitter if we're stopping/
|
# Don't try to send to an emitter if we're stopping/
|
||||||
if self.continue_running:
|
if self.continue_running:
|
||||||
name = self.emitter.__name__
|
name = self.emitter.__name__
|
||||||
emitter_status = EmitterStatus(name)
|
emitter_status = monagent.common.check_status.EmitterStatus(name)
|
||||||
try:
|
try:
|
||||||
self.emitter(payload, log, self.agent_config['forwarder_url'])
|
self.emitter(payload, log, self.agent_config['forwarder_url'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception("Error running emitter: %s" % self.emitter.__name__)
|
log.exception("Error running emitter: %s" % self.emitter.__name__)
|
||||||
emitter_status = EmitterStatus(name, e)
|
emitter_status = monagent.common.check_status.EmitterStatus(name, e)
|
||||||
statuses.append(emitter_status)
|
statuses.append(emitter_status)
|
||||||
return statuses
|
return statuses
|
||||||
|
|
||||||
def _set_status(self, check_statuses, emitter_statuses, collect_duration):
|
def _set_status(self, check_statuses, emitter_statuses, collect_duration):
|
||||||
try:
|
try:
|
||||||
CollectorStatus(check_statuses, emitter_statuses).persist()
|
monagent.common.check_status.CollectorStatus(check_statuses, emitter_statuses).persist()
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("Error persisting collector status")
|
log.exception("Error persisting collector status")
|
||||||
|
|
||||||
|
@ -125,13 +125,11 @@ class Collector(object):
|
||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""
|
"""Collect data from each check and submit their data.
|
||||||
Collect data from each check and submit their data.
|
|
||||||
There are currently two types of checks the system checks and the configured ones from checks_d
|
There are currently two types of checks the system checks and the configured ones from checks_d
|
||||||
"""
|
"""
|
||||||
timer = Timer()
|
timer = monagent.common.util.Timer()
|
||||||
if self.os != 'windows':
|
|
||||||
cpu_clock = time.clock()
|
|
||||||
self.run_count += 1
|
self.run_count += 1
|
||||||
log.debug("Starting collection run #%s" % self.run_count)
|
log.debug("Starting collection run #%s" % self.run_count)
|
||||||
|
|
||||||
|
@ -144,7 +142,7 @@ class Collector(object):
|
||||||
for check_type in self._legacy_checks:
|
for check_type in self._legacy_checks:
|
||||||
try:
|
try:
|
||||||
for name, value in check_type.check().iteritems():
|
for name, value in check_type.check().iteritems():
|
||||||
metrics_list.append(Measurement(name, timestamp, value, {}))
|
metrics_list.append(monagent.common.metrics.Measurement(name, timestamp, value, {}))
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception('Error running check.')
|
log.exception('Error running check.')
|
||||||
|
|
||||||
|
@ -163,10 +161,10 @@ class Collector(object):
|
||||||
# Add in metrics on the collector run, emit_duration is from the previous run
|
# Add in metrics on the collector run, emit_duration is from the previous run
|
||||||
for name, value in self.collector_stats(len(metrics_list), len(events),
|
for name, value in self.collector_stats(len(metrics_list), len(events),
|
||||||
collect_duration, self.emit_duration).iteritems():
|
collect_duration, self.emit_duration).iteritems():
|
||||||
metrics_list.append(Measurement(name,
|
metrics_list.append(monagent.common.metrics.Measurement(name,
|
||||||
timestamp,
|
timestamp,
|
||||||
value,
|
value,
|
||||||
{'service': 'monasca', 'component': 'collector'}))
|
{'service': 'monasca', 'component': 'collector'}))
|
||||||
|
|
||||||
emitter_statuses = self._emit(metrics_list)
|
emitter_statuses = self._emit(metrics_list)
|
||||||
self.emit_duration = timer.step()
|
self.emit_duration = timer.step()
|
||||||
|
@ -175,8 +173,9 @@ class Collector(object):
|
||||||
self._set_status(checks_statuses, emitter_statuses, collect_duration)
|
self._set_status(checks_statuses, emitter_statuses, collect_duration)
|
||||||
|
|
||||||
def run_checks_d(self):
|
def run_checks_d(self):
|
||||||
""" Run defined checks_d checks.
|
"""Run defined checks_d checks.
|
||||||
returns a list of Measurements, a dictionary of events and a list of check statuses.
|
|
||||||
|
returns a list of Measurements, a dictionary of events and a list of check statuses.
|
||||||
"""
|
"""
|
||||||
measurements = []
|
measurements = []
|
||||||
events = {}
|
events = {}
|
||||||
|
@ -210,23 +209,22 @@ class Collector(object):
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("Error running check %s" % check.name)
|
log.exception("Error running check %s" % check.name)
|
||||||
|
|
||||||
check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count,
|
check_status = monagent.common.check_status.CheckStatus(check.name, instance_statuses, metric_count, event_count,
|
||||||
library_versions=check.get_library_info())
|
library_versions=check.get_library_info())
|
||||||
check_statuses.append(check_status)
|
check_statuses.append(check_status)
|
||||||
|
|
||||||
for check_name, info in self.init_failed_checks_d.iteritems():
|
for check_name, info in self.init_failed_checks_d.iteritems():
|
||||||
if not self.continue_running:
|
if not self.continue_running:
|
||||||
return
|
return
|
||||||
check_status = CheckStatus(check_name, None, None, None,
|
check_status = monagent.common.check_status.CheckStatus(check_name, None, None, None,
|
||||||
init_failed_error=info['error'],
|
init_failed_error=info['error'],
|
||||||
init_failed_traceback=info['traceback'])
|
init_failed_traceback=info['traceback'])
|
||||||
check_statuses.append(check_status)
|
check_statuses.append(check_status)
|
||||||
|
|
||||||
return measurements, events, check_statuses
|
return measurements, events, check_statuses
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""
|
"""Tell the collector to stop at the next logical point.
|
||||||
Tell the collector to stop at the next logical point.
|
|
||||||
"""
|
"""
|
||||||
# This is called when the process is being killed, so
|
# This is called when the process is being killed, so
|
||||||
# try to stop the collector as soon as possible.
|
# try to stop the collector as soon as possible.
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
|
import datetime
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import traceback
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
import traceback
|
||||||
from itertools import groupby # >= python 2.4
|
|
||||||
|
|
||||||
from utils import TailFile
|
import monagent.collector
|
||||||
from monagent.common.util import LaconicFilter
|
import monagent.common.util
|
||||||
from monagent.collector import modules
|
import utils
|
||||||
|
|
||||||
|
|
||||||
if hasattr('some string', 'partition'):
|
if hasattr('some string', 'partition'):
|
||||||
|
@ -111,7 +111,7 @@ class Dogstream(object):
|
||||||
|
|
||||||
if parser_spec:
|
if parser_spec:
|
||||||
try:
|
try:
|
||||||
parse_func = modules.load(parser_spec, 'parser')
|
parse_func = monagent.collector.modules.load(parser_spec, 'parser')
|
||||||
if isinstance(parse_func, type):
|
if isinstance(parse_func, type):
|
||||||
logger.info('Instantiating class-based dogstream')
|
logger.info('Instantiating class-based dogstream')
|
||||||
parse_func = parse_func(
|
parse_func = parse_func(
|
||||||
|
@ -142,7 +142,7 @@ class Dogstream(object):
|
||||||
self.class_based = class_based
|
self.class_based = class_based
|
||||||
|
|
||||||
# Apply LaconicFilter to avoid log flooding
|
# Apply LaconicFilter to avoid log flooding
|
||||||
self.logger.addFilter(LaconicFilter("dogstream"))
|
self.logger.addFilter(monagent.common.util.LaconicFilter("dogstream"))
|
||||||
|
|
||||||
self.log_path = log_path
|
self.log_path = log_path
|
||||||
self.parse_func = parse_func or self._default_line_parser
|
self.parse_func = parse_func or self._default_line_parser
|
||||||
|
@ -163,7 +163,7 @@ class Dogstream(object):
|
||||||
|
|
||||||
# Build our tail -f
|
# Build our tail -f
|
||||||
if self._gen is None:
|
if self._gen is None:
|
||||||
self._gen = TailFile(
|
self._gen = utils.TailFile(
|
||||||
self.logger,
|
self.logger,
|
||||||
self.log_path,
|
self.log_path,
|
||||||
self._line_parser).tail(
|
self._line_parser).tail(
|
||||||
|
@ -202,7 +202,7 @@ class Dogstream(object):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
parsed = self.parse_func(self.logger, line, self.parser_state, *self.parse_args)
|
parsed = self.parse_func(self.logger, line, self.parser_state, *self.parse_args)
|
||||||
except TypeError as e:
|
except TypeError:
|
||||||
# Arity of parse_func is 3 (old-style), not 4
|
# Arity of parse_func is 3 (old-style), not 4
|
||||||
parsed = self.parse_func(self.logger, line)
|
parsed = self.parse_func(self.logger, line)
|
||||||
|
|
||||||
|
@ -250,7 +250,7 @@ class Dogstream(object):
|
||||||
try:
|
try:
|
||||||
# Bucket points into 15 second buckets
|
# Bucket points into 15 second buckets
|
||||||
ts = (int(float(ts)) / self._freq) * self._freq
|
ts = (int(float(ts)) / self._freq) * self._freq
|
||||||
date = datetime.fromtimestamp(ts)
|
date = datetime.datetime.fromtimestamp(ts)
|
||||||
assert date.year > 1990
|
assert date.year > 1990
|
||||||
except Exception:
|
except Exception:
|
||||||
invalid_reasons.append('invalid timestamp')
|
invalid_reasons.append('invalid timestamp')
|
||||||
|
@ -265,14 +265,13 @@ class Dogstream(object):
|
||||||
repr(datum), ', '.join(invalid_reasons), line)
|
repr(datum), ', '.join(invalid_reasons), line)
|
||||||
else:
|
else:
|
||||||
self._values.append((metric, ts, value, attrs))
|
self._values.append((metric, ts, value, attrs))
|
||||||
except Exception as e:
|
except Exception:
|
||||||
self.logger.debug("Error while parsing line %s" % line, exc_info=True)
|
self.logger.debug("Error while parsing line %s" % line, exc_info=True)
|
||||||
self._error_count += 1
|
self._error_count += 1
|
||||||
self.logger.error("Parser error: %s out of %s" % (self._error_count, self._line_count))
|
self.logger.error("Parser error: %s out of %s" % (self._error_count, self._line_count))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _default_line_parser(logger, line):
|
def _default_line_parser(logger, line):
|
||||||
original_line = line
|
|
||||||
sep = ' '
|
sep = ' '
|
||||||
metric, _, line = partition(line.strip(), sep)
|
metric, _, line = partition(line.strip(), sep)
|
||||||
timestamp, _, line = partition(line.strip(), sep)
|
timestamp, _, line = partition(line.strip(), sep)
|
||||||
|
@ -284,13 +283,14 @@ class Dogstream(object):
|
||||||
keyval, _, line = partition(line.strip(), sep)
|
keyval, _, line = partition(line.strip(), sep)
|
||||||
key, val = keyval.split('=', 1)
|
key, val = keyval.split('=', 1)
|
||||||
attributes[key] = val
|
attributes[key] = val
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
return metric, timestamp, value, attributes
|
return metric, timestamp, value, attributes
|
||||||
|
|
||||||
def _aggregate(self, values):
|
def _aggregate(self, values):
|
||||||
""" Aggregate values down to the second and store as:
|
"""Aggregate values down to the second and store as:
|
||||||
|
|
||||||
{
|
{
|
||||||
"dogstream": [(metric, timestamp, value, {key: val})]
|
"dogstream": [(metric, timestamp, value, {key: val})]
|
||||||
}
|
}
|
||||||
|
@ -300,7 +300,7 @@ class Dogstream(object):
|
||||||
|
|
||||||
values.sort(key=point_sorter)
|
values.sort(key=point_sorter)
|
||||||
|
|
||||||
for (timestamp, metric, host_name, device_name), val_attrs in groupby(values, key=point_sorter):
|
for (timestamp, metric, host_name, device_name), val_attrs in itertools.groupby(values, key=point_sorter):
|
||||||
attributes = {}
|
attributes = {}
|
||||||
vals = []
|
vals = []
|
||||||
for _metric, _timestamp, v, a in val_attrs:
|
for _metric, _timestamp, v, a in val_attrs:
|
||||||
|
@ -519,6 +519,3 @@ class NagiosServicePerfData(NagiosPerfData):
|
||||||
if middle_name:
|
if middle_name:
|
||||||
metric.append(middle_name.replace(' ', '_').lower())
|
metric.append(middle_name.replace(' ', '_').lower())
|
||||||
return metric
|
return metric
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
testddForwarder()
|
|
||||||
|
|
|
@ -32,28 +32,34 @@ SENTINEL = "QUIT"
|
||||||
|
|
||||||
def is_sentinel(obj):
|
def is_sentinel(obj):
|
||||||
"""Predicate to determine whether an item from the queue is the
|
"""Predicate to determine whether an item from the queue is the
|
||||||
signal to stop"""
|
|
||||||
|
signal to stop
|
||||||
|
"""
|
||||||
return isinstance(obj, str) and obj == SENTINEL
|
return isinstance(obj, str) and obj == SENTINEL
|
||||||
|
|
||||||
|
|
||||||
class TimeoutError(Exception):
|
class TimeoutError(Exception):
|
||||||
|
|
||||||
"""Raised when a result is not available within the given timeout"""
|
"""Raised when a result is not available within the given timeout.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PoolWorker(threading.Thread):
|
class PoolWorker(threading.Thread):
|
||||||
|
|
||||||
"""Thread that consumes WorkUnits from a queue to process them"""
|
"""Thread that consumes WorkUnits from a queue to process them.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, workq, *args, **kwds):
|
def __init__(self, workq, *args, **kwds):
|
||||||
"""\param workq: Queue object to consume the work units from"""
|
"""\param workq: Queue object to consume the work units from.
|
||||||
|
"""
|
||||||
threading.Thread.__init__(self, *args, **kwds)
|
threading.Thread.__init__(self, *args, **kwds)
|
||||||
self._workq = workq
|
self._workq = workq
|
||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Process the work unit, or wait for sentinel to exit"""
|
"""Process the work unit, or wait for sentinel to exit.
|
||||||
|
"""
|
||||||
while True:
|
while True:
|
||||||
self.running = True
|
self.running = True
|
||||||
workunit = self._workq.get()
|
workunit = self._workq.get()
|
||||||
|
@ -69,14 +75,15 @@ class PoolWorker(threading.Thread):
|
||||||
class Pool(object):
|
class Pool(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The Pool class represents a pool of worker threads. It has methods
|
The Pool class represents a pool of worker threads.
|
||||||
which allows tasks to be offloaded to the worker processes in a
|
|
||||||
few different ways
|
It has methods which allows tasks to be offloaded to the
|
||||||
|
worker processes in a few different ways.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, nworkers, name="Pool"):
|
def __init__(self, nworkers, name="Pool"):
|
||||||
"""
|
"""\param nworkers (integer) number of worker threads to start
|
||||||
\param nworkers (integer) number of worker threads to start
|
|
||||||
\param name (string) prefix for the worker threads' name
|
\param name (string) prefix for the worker threads' name
|
||||||
"""
|
"""
|
||||||
self._workq = Queue.Queue()
|
self._workq = Queue.Queue()
|
||||||
|
@ -86,7 +93,7 @@ class Pool(object):
|
||||||
thr = PoolWorker(self._workq, name="Worker-%s-%d" % (name, idx))
|
thr = PoolWorker(self._workq, name="Worker-%s-%d" % (name, idx))
|
||||||
try:
|
try:
|
||||||
thr.start()
|
thr.start()
|
||||||
except:
|
except Exception:
|
||||||
# If one thread has a problem, undo everything
|
# If one thread has a problem, undo everything
|
||||||
self.terminate()
|
self.terminate()
|
||||||
raise
|
raise
|
||||||
|
@ -97,25 +104,28 @@ class Pool(object):
|
||||||
return len([w for w in self._workers if w.running])
|
return len([w for w in self._workers if w.running])
|
||||||
|
|
||||||
def apply(self, func, args=(), kwds=None):
|
def apply(self, func, args=(), kwds=None):
|
||||||
"""Equivalent of the apply() builtin function. It blocks till
|
"""Equivalent of the apply() builtin function.
|
||||||
the result is ready."""
|
|
||||||
|
It blocks till the result is ready.
|
||||||
|
"""
|
||||||
if not kwds:
|
if not kwds:
|
||||||
kwds = dict()
|
kwds = dict()
|
||||||
return self.apply_async(func, args, kwds).get()
|
return self.apply_async(func, args, kwds).get()
|
||||||
|
|
||||||
def map(self, func, iterable, chunksize=None):
|
def map(self, func, iterable, chunksize=None):
|
||||||
"""A parallel equivalent of the map() builtin function. It
|
"""A parallel equivalent of the map() builtin function.
|
||||||
blocks till the result is ready.
|
|
||||||
|
It blocks till the result is ready.
|
||||||
|
|
||||||
This method chops the iterable into a number of chunks which
|
This method chops the iterable into a number of chunks which
|
||||||
it submits to the process pool as separate tasks. The
|
it submits to the process pool as separate tasks. The
|
||||||
(approximate) size of these chunks can be specified by setting
|
(approximate) size of these chunks can be specified by setting
|
||||||
chunksize to a positive integer."""
|
chunksize to a positive integer.
|
||||||
|
"""
|
||||||
return self.map_async(func, iterable, chunksize).get()
|
return self.map_async(func, iterable, chunksize).get()
|
||||||
|
|
||||||
def imap(self, func, iterable, chunksize=1):
|
def imap(self, func, iterable, chunksize=1):
|
||||||
"""
|
"""An equivalent of itertools.imap().
|
||||||
An equivalent of itertools.imap().
|
|
||||||
|
|
||||||
The chunksize argument is the same as the one used by the
|
The chunksize argument is the same as the one used by the
|
||||||
map() method. For very long iterables using a large value for
|
map() method. For very long iterables using a large value for
|
||||||
|
@ -133,22 +143,24 @@ class Pool(object):
|
||||||
|
|
||||||
def imap_unordered(self, func, iterable, chunksize=1):
|
def imap_unordered(self, func, iterable, chunksize=1):
|
||||||
"""The same as imap() except that the ordering of the results
|
"""The same as imap() except that the ordering of the results
|
||||||
from the returned iterator should be considered
|
|
||||||
arbitrary. (Only when there is only one worker process is the
|
from the returned iterator should be considered arbitrary.
|
||||||
order guaranteed to be "correct".)"""
|
(Only when there is only one worker process is the order
|
||||||
|
guaranteed to be "correct".)
|
||||||
|
"""
|
||||||
collector = UnorderedResultCollector()
|
collector = UnorderedResultCollector()
|
||||||
self._create_sequences(func, iterable, chunksize, collector)
|
self._create_sequences(func, iterable, chunksize, collector)
|
||||||
return iter(collector)
|
return iter(collector)
|
||||||
|
|
||||||
def apply_async(self, func, args=(), kwds=None, callback=None):
|
def apply_async(self, func, args=(), kwds=None, callback=None):
|
||||||
"""A variant of the apply() method which returns an
|
"""A variant of the apply() method which returns an ApplyResult object.
|
||||||
ApplyResult object.
|
|
||||||
|
|
||||||
If callback is specified then it should be a callable which
|
If callback is specified then it should be a callable which
|
||||||
accepts a single argument. When the result becomes ready,
|
accepts a single argument. When the result becomes ready,
|
||||||
callback is applied to it (unless the call failed). callback
|
callback is applied to it (unless the call failed). callback
|
||||||
should complete immediately since otherwise the thread which
|
should complete immediately since otherwise the thread which
|
||||||
handles the results will get blocked."""
|
handles the results will get blocked.
|
||||||
|
"""
|
||||||
if not kwds:
|
if not kwds:
|
||||||
kwds = dict()
|
kwds = dict()
|
||||||
assert not self._closed # No lock here. We assume it's atomic...
|
assert not self._closed # No lock here. We assume it's atomic...
|
||||||
|
@ -158,14 +170,14 @@ class Pool(object):
|
||||||
return apply_result
|
return apply_result
|
||||||
|
|
||||||
def map_async(self, func, iterable, chunksize=None, callback=None):
|
def map_async(self, func, iterable, chunksize=None, callback=None):
|
||||||
"""A variant of the map() method which returns a ApplyResult
|
"""A variant of the map() method which returns a ApplyResult object.
|
||||||
object.
|
|
||||||
|
|
||||||
If callback is specified then it should be a callable which
|
If callback is specified then it should be a callable which
|
||||||
accepts a single argument. When the result becomes ready
|
accepts a single argument. When the result becomes ready
|
||||||
callback is applied to it (unless the call failed). callback
|
callback is applied to it (unless the call failed). callback
|
||||||
should complete immediately since otherwise the thread which
|
should complete immediately since otherwise the thread which
|
||||||
handles the results will get blocked."""
|
handles the results will get blocked.
|
||||||
|
"""
|
||||||
apply_result = ApplyResult(callback=callback)
|
apply_result = ApplyResult(callback=callback)
|
||||||
collector = OrderedResultCollector(apply_result, as_iterator=False)
|
collector = OrderedResultCollector(apply_result, as_iterator=False)
|
||||||
self._create_sequences(func, iterable, chunksize, collector)
|
self._create_sequences(func, iterable, chunksize, collector)
|
||||||
|
@ -173,6 +185,7 @@ class Pool(object):
|
||||||
|
|
||||||
def imap_async(self, func, iterable, chunksize=None, callback=None):
|
def imap_async(self, func, iterable, chunksize=None, callback=None):
|
||||||
"""A variant of the imap() method which returns an ApplyResult
|
"""A variant of the imap() method which returns an ApplyResult
|
||||||
|
|
||||||
object that provides an iterator (next method(timeout)
|
object that provides an iterator (next method(timeout)
|
||||||
available).
|
available).
|
||||||
|
|
||||||
|
@ -180,7 +193,8 @@ class Pool(object):
|
||||||
accepts a single argument. When the resulting iterator becomes
|
accepts a single argument. When the resulting iterator becomes
|
||||||
ready, callback is applied to it (unless the call
|
ready, callback is applied to it (unless the call
|
||||||
failed). callback should complete immediately since otherwise
|
failed). callback should complete immediately since otherwise
|
||||||
the thread which handles the results will get blocked."""
|
the thread which handles the results will get blocked.
|
||||||
|
"""
|
||||||
apply_result = ApplyResult(callback=callback)
|
apply_result = ApplyResult(callback=callback)
|
||||||
collector = OrderedResultCollector(apply_result, as_iterator=True)
|
collector = OrderedResultCollector(apply_result, as_iterator=True)
|
||||||
self._create_sequences(func, iterable, chunksize, collector)
|
self._create_sequences(func, iterable, chunksize, collector)
|
||||||
|
@ -189,30 +203,35 @@ class Pool(object):
|
||||||
def imap_unordered_async(self, func, iterable, chunksize=None,
|
def imap_unordered_async(self, func, iterable, chunksize=None,
|
||||||
callback=None):
|
callback=None):
|
||||||
"""A variant of the imap_unordered() method which returns an
|
"""A variant of the imap_unordered() method which returns an
|
||||||
ApplyResult object that provides an iterator (next
|
|
||||||
method(timeout) available).
|
ApplyResult object that provides an iterator (next method(timeout)
|
||||||
|
available).
|
||||||
|
|
||||||
If callback is specified then it should be a callable which
|
If callback is specified then it should be a callable which
|
||||||
accepts a single argument. When the resulting iterator becomes
|
accepts a single argument. When the resulting iterator becomes
|
||||||
ready, callback is applied to it (unless the call
|
ready, callback is applied to it (unless the call
|
||||||
failed). callback should complete immediately since otherwise
|
failed). callback should complete immediately since otherwise
|
||||||
the thread which handles the results will get blocked."""
|
the thread which handles the results will get blocked.
|
||||||
|
"""
|
||||||
apply_result = ApplyResult(callback=callback)
|
apply_result = ApplyResult(callback=callback)
|
||||||
collector = UnorderedResultCollector(apply_result)
|
collector = UnorderedResultCollector(apply_result)
|
||||||
self._create_sequences(func, iterable, chunksize, collector)
|
self._create_sequences(func, iterable, chunksize, collector)
|
||||||
return apply_result
|
return apply_result
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""Prevents any more tasks from being submitted to the
|
"""Prevents any more tasks from being submitted to the pool.
|
||||||
pool. Once all the tasks have been completed the worker
|
|
||||||
processes will exit."""
|
Once all the tasks have been completed the worker
|
||||||
|
processes will exit.
|
||||||
|
"""
|
||||||
# No lock here. We assume it's sufficiently atomic...
|
# No lock here. We assume it's sufficiently atomic...
|
||||||
self._closed = True
|
self._closed = True
|
||||||
|
|
||||||
def terminate(self):
|
def terminate(self):
|
||||||
"""Stops the worker processes immediately without completing
|
"""Stops the worker processes immediately without completing outstanding work.
|
||||||
outstanding work. When the pool object is garbage collected
|
|
||||||
terminate() will be called immediately."""
|
When the pool object is garbage collected terminate() will be called immediately.
|
||||||
|
"""
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
# Clearing the job queue
|
# Clearing the job queue
|
||||||
|
@ -228,18 +247,19 @@ class Pool(object):
|
||||||
self._workq.put(SENTINEL)
|
self._workq.put(SENTINEL)
|
||||||
|
|
||||||
def join(self):
|
def join(self):
|
||||||
"""Wait for the worker processes to exit. One must call
|
"""Wait for the worker processes to exit.
|
||||||
close() or terminate() before using join()."""
|
|
||||||
|
One must call close() or terminate() before using join().
|
||||||
|
"""
|
||||||
for thr in self._workers:
|
for thr in self._workers:
|
||||||
thr.join()
|
thr.join()
|
||||||
|
|
||||||
def _create_sequences(self, func, iterable, chunksize, collector=None):
|
def _create_sequences(self, func, iterable, chunksize, collector=None):
|
||||||
"""
|
"""Create the WorkUnit objects to process and pushes them on the work queue.
|
||||||
Create the WorkUnit objects to process and pushes them on the
|
|
||||||
work queue. Each work unit is meant to process a slice of
|
Each work unit is meant to process a slice of iterable of size chunksize.
|
||||||
iterable of size chunksize. If collector is specified, then
|
If collector is specified, then the ApplyResult objects associated with
|
||||||
the ApplyResult objects associated with the jobs will notify
|
the jobs will notify collector when their result becomes ready.
|
||||||
collector when their result becomes ready.
|
|
||||||
|
|
||||||
\return the list of WorkUnit objects (basically: JobSequences)
|
\return the list of WorkUnit objects (basically: JobSequences)
|
||||||
pushed onto the work queue
|
pushed onto the work queue
|
||||||
|
@ -271,8 +291,10 @@ class Pool(object):
|
||||||
|
|
||||||
class WorkUnit(object):
|
class WorkUnit(object):
|
||||||
|
|
||||||
"""ABC for a unit of work submitted to the worker threads. It's
|
"""ABC for a unit of work submitted to the worker threads.
|
||||||
basically just an object equipped with a process() method"""
|
|
||||||
|
It's basically just an object equipped with a process() method
|
||||||
|
"""
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
"""Do the work. Shouldn't raise any exception"""
|
"""Do the work. Shouldn't raise any exception"""
|
||||||
|
@ -281,11 +303,12 @@ class WorkUnit(object):
|
||||||
|
|
||||||
class Job(WorkUnit):
|
class Job(WorkUnit):
|
||||||
|
|
||||||
"""A work unit that corresponds to the execution of a single function"""
|
"""A work unit that corresponds to the execution of a single function.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, func, args, kwds, apply_result):
|
def __init__(self, func, args, kwds, apply_result):
|
||||||
"""
|
"""\param func/args/kwds used to call the function
|
||||||
\param func/args/kwds used to call the function
|
|
||||||
\param apply_result ApplyResult object that holds the result
|
\param apply_result ApplyResult object that holds the result
|
||||||
of the function call
|
of the function call
|
||||||
"""
|
"""
|
||||||
|
@ -296,14 +319,14 @@ class Job(WorkUnit):
|
||||||
self._result = apply_result
|
self._result = apply_result
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
"""
|
"""Call the function with the args/kwds and tell the ApplyResult
|
||||||
Call the function with the args/kwds and tell the ApplyResult
|
|
||||||
that its result is ready. Correctly handles the exceptions
|
that its result is ready. Correctly handles the exceptions
|
||||||
happening during the execution of the function
|
happening during the execution of the function.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
result = self._func(*self._args, **self._kwds)
|
result = self._func(*self._args, **self._kwds)
|
||||||
except:
|
except Exception:
|
||||||
self._result._set_exception()
|
self._result._set_exception()
|
||||||
else:
|
else:
|
||||||
self._result._set_value(result)
|
self._result._set_value(result)
|
||||||
|
@ -312,15 +335,15 @@ class Job(WorkUnit):
|
||||||
class JobSequence(WorkUnit):
|
class JobSequence(WorkUnit):
|
||||||
|
|
||||||
"""A work unit that corresponds to the processing of a continuous
|
"""A work unit that corresponds to the processing of a continuous
|
||||||
sequence of Job objects"""
|
sequence of Job objects
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, jobs):
|
def __init__(self, jobs):
|
||||||
WorkUnit.__init__(self)
|
WorkUnit.__init__(self)
|
||||||
self._jobs = jobs
|
self._jobs = jobs
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
"""
|
"""Call process() on all the Job objects that have been specified.
|
||||||
Call process() on all the Job objects that have been specified
|
|
||||||
"""
|
"""
|
||||||
for job in self._jobs:
|
for job in self._jobs:
|
||||||
job.process()
|
job.process()
|
||||||
|
@ -329,16 +352,18 @@ class JobSequence(WorkUnit):
|
||||||
class ApplyResult(object):
|
class ApplyResult(object):
|
||||||
|
|
||||||
"""An object associated with a Job object that holds its result:
|
"""An object associated with a Job object that holds its result:
|
||||||
|
|
||||||
it's available during the whole life the Job and after, even when
|
it's available during the whole life the Job and after, even when
|
||||||
the Job didn't process yet. It's possible to use this object to
|
the Job didn't process yet. It's possible to use this object to
|
||||||
wait for the result/exception of the job to be available.
|
wait for the result/exception of the job to be available.
|
||||||
|
|
||||||
The result objects returns by the Pool::*_async() methods are of
|
The result objects returns by the Pool::*_async() methods are of
|
||||||
this type"""
|
this type
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, collector=None, callback=None):
|
def __init__(self, collector=None, callback=None):
|
||||||
"""
|
"""\param collector when not None, the notify_ready() method of
|
||||||
\param collector when not None, the notify_ready() method of
|
|
||||||
the collector will be called when the result from the Job is
|
the collector will be called when the result from the Job is
|
||||||
ready
|
ready
|
||||||
\param callback when not None, function to call when the
|
\param callback when not None, function to call when the
|
||||||
|
@ -356,11 +381,11 @@ class ApplyResult(object):
|
||||||
self._collector = collector
|
self._collector = collector
|
||||||
|
|
||||||
def get(self, timeout=None):
|
def get(self, timeout=None):
|
||||||
"""
|
"""Returns the result when it arrives.
|
||||||
Returns the result when it arrives. If timeout is not None and
|
|
||||||
the result does not arrive within timeout seconds then
|
If timeout is not None and the result does not arrive within timeout
|
||||||
TimeoutError is raised. If the remote call raised an exception
|
seconds then TimeoutError is raised. If the remote call raised an
|
||||||
then that exception will be reraised by get().
|
exception then that exception will be re-raised by get().
|
||||||
"""
|
"""
|
||||||
if not self.wait(timeout):
|
if not self.wait(timeout):
|
||||||
raise TimeoutError("Result not available within %fs" % timeout)
|
raise TimeoutError("Result not available within %fs" % timeout)
|
||||||
|
@ -369,27 +394,31 @@ class ApplyResult(object):
|
||||||
raise self._data[0], self._data[1], self._data[2]
|
raise self._data[0], self._data[1], self._data[2]
|
||||||
|
|
||||||
def wait(self, timeout=None):
|
def wait(self, timeout=None):
|
||||||
"""Waits until the result is available or until timeout
|
"""Waits until the result is available or until timeout seconds pass.
|
||||||
seconds pass."""
|
"""
|
||||||
self._event.wait(timeout)
|
self._event.wait(timeout)
|
||||||
return self._event.isSet()
|
return self._event.isSet()
|
||||||
|
|
||||||
def ready(self):
|
def ready(self):
|
||||||
"""Returns whether the call has completed."""
|
"""Returns whether the call has completed.
|
||||||
|
"""
|
||||||
return self._event.isSet()
|
return self._event.isSet()
|
||||||
|
|
||||||
def successful(self):
|
def successful(self):
|
||||||
"""Returns whether the call completed without raising an
|
"""Returns whether the call completed without raising an exception.
|
||||||
exception. Will raise AssertionError if the result is not
|
|
||||||
ready."""
|
Will raise AssertionError if the result is not ready.
|
||||||
|
"""
|
||||||
assert self.ready()
|
assert self.ready()
|
||||||
return self._success
|
return self._success
|
||||||
|
|
||||||
def _set_value(self, value):
|
def _set_value(self, value):
|
||||||
"""Called by a Job object to tell the result is ready, and
|
"""Called by a Job object to tell the result is ready, and
|
||||||
|
|
||||||
provides the value of this result. The object will become
|
provides the value of this result. The object will become
|
||||||
ready and successful. The collector's notify_ready() method
|
ready and successful. The collector's notify_ready() method
|
||||||
will be called, and the callback method too"""
|
will be called, and the callback method too.
|
||||||
|
"""
|
||||||
assert not self.ready()
|
assert not self.ready()
|
||||||
self._data = value
|
self._data = value
|
||||||
self._success = True
|
self._success = True
|
||||||
|
@ -399,14 +428,16 @@ class ApplyResult(object):
|
||||||
if self._callback is not None:
|
if self._callback is not None:
|
||||||
try:
|
try:
|
||||||
self._callback(value)
|
self._callback(value)
|
||||||
except:
|
except Exception:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
def _set_exception(self):
|
def _set_exception(self):
|
||||||
"""Called by a Job object to tell that an exception occured
|
"""Called by a Job object to tell that an exception occured
|
||||||
|
|
||||||
during the processing of the function. The object will become
|
during the processing of the function. The object will become
|
||||||
ready but not successful. The collector's notify_ready()
|
ready but not successful. The collector's notify_ready()
|
||||||
method will be called, but NOT the callback method"""
|
method will be called, but NOT the callback method
|
||||||
|
"""
|
||||||
assert not self.ready()
|
assert not self.ready()
|
||||||
self._data = sys.exc_info()
|
self._data = sys.exc_info()
|
||||||
self._success = False
|
self._success = False
|
||||||
|
@ -417,22 +448,25 @@ class ApplyResult(object):
|
||||||
|
|
||||||
class AbstractResultCollector(object):
|
class AbstractResultCollector(object):
|
||||||
|
|
||||||
"""ABC to define the interface of a ResultCollector object. It is
|
"""ABC to define the interface of a ResultCollector object.
|
||||||
basically an object which knows whuich results it's waiting for,
|
|
||||||
|
It is basically an object which knows whuich results it's waiting for,
|
||||||
and which is able to get notify when they get available. It is
|
and which is able to get notify when they get available. It is
|
||||||
also able to provide an iterator over the results when they are
|
also able to provide an iterator over the results when they are
|
||||||
available"""
|
available.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, to_notify):
|
def __init__(self, to_notify):
|
||||||
"""
|
"""\param to_notify ApplyResult object to notify when all the
|
||||||
\param to_notify ApplyResult object to notify when all the
|
|
||||||
results we're waiting for become available. Can be None.
|
results we're waiting for become available. Can be None.
|
||||||
"""
|
"""
|
||||||
self._to_notify = to_notify
|
self._to_notify = to_notify
|
||||||
|
|
||||||
def register_result(self, apply_result):
|
def register_result(self, apply_result):
|
||||||
"""Used to identify which results we're waiting for. Will
|
"""Used to identify which results we're waiting for.
|
||||||
always be called BEFORE the Jobs get submitted to the work
|
|
||||||
|
Will always be called BEFORE the Jobs get submitted to the work
|
||||||
queue, and BEFORE the __iter__ and _get_result() methods can
|
queue, and BEFORE the __iter__ and _get_result() methods can
|
||||||
be called
|
be called
|
||||||
\param apply_result ApplyResult object to add in our collection
|
\param apply_result ApplyResult object to add in our collection
|
||||||
|
@ -441,6 +475,7 @@ class AbstractResultCollector(object):
|
||||||
|
|
||||||
def notify_ready(self, apply_result):
|
def notify_ready(self, apply_result):
|
||||||
"""Called by the ApplyResult object (already registered via
|
"""Called by the ApplyResult object (already registered via
|
||||||
|
|
||||||
register_result()) that it is now ready (ie. the Job's result
|
register_result()) that it is now ready (ie. the Job's result
|
||||||
is available or an exception has been raised).
|
is available or an exception has been raised).
|
||||||
\param apply_result ApplyResult object telling us that the job
|
\param apply_result ApplyResult object telling us that the job
|
||||||
|
@ -450,6 +485,7 @@ class AbstractResultCollector(object):
|
||||||
|
|
||||||
def _get_result(self, idx, timeout=None):
|
def _get_result(self, idx, timeout=None):
|
||||||
"""Called by the CollectorIterator object to retrieve the
|
"""Called by the CollectorIterator object to retrieve the
|
||||||
|
|
||||||
result's values one after another (order defined by the
|
result's values one after another (order defined by the
|
||||||
implementation)
|
implementation)
|
||||||
\param idx The index of the result we want, wrt collector's order
|
\param idx The index of the result we want, wrt collector's order
|
||||||
|
@ -460,19 +496,23 @@ class AbstractResultCollector(object):
|
||||||
raise NotImplementedError("Children classes must implement it")
|
raise NotImplementedError("Children classes must implement it")
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Return a new CollectorIterator object for this collector"""
|
"""Return a new CollectorIterator object for this collector.
|
||||||
|
"""
|
||||||
return CollectorIterator(self)
|
return CollectorIterator(self)
|
||||||
|
|
||||||
|
|
||||||
class CollectorIterator(object):
|
class CollectorIterator(object):
|
||||||
|
|
||||||
"""An iterator that allows to iterate over the result values
|
"""An iterator that allows to iterate over the result values
|
||||||
|
|
||||||
available in the given collector object. Equipped with an extended
|
available in the given collector object. Equipped with an extended
|
||||||
next() method accepting a timeout argument. Created by the
|
next() method accepting a timeout argument. Created by the
|
||||||
AbstractResultCollector::__iter__() method"""
|
AbstractResultCollector::__iter__() method
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, collector):
|
def __init__(self, collector):
|
||||||
"""\param AbstractResultCollector instance"""
|
"""\param AbstractResultCollector instance.
|
||||||
|
"""
|
||||||
self._collector = collector
|
self._collector = collector
|
||||||
self._idx = 0
|
self._idx = 0
|
||||||
|
|
||||||
|
@ -480,16 +520,18 @@ class CollectorIterator(object):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def next(self, timeout=None):
|
def next(self, timeout=None):
|
||||||
"""Return the next result value in the sequence. Raise
|
"""Return the next result value in the sequence.
|
||||||
StopIteration at the end. Can raise the exception raised by
|
|
||||||
the Job"""
|
Raise StopIteration at the end. Can raise the exception raised by
|
||||||
|
the Job.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
apply_result = self._collector._get_result(self._idx, timeout)
|
apply_result = self._collector._get_result(self._idx, timeout)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# Reset for next time
|
# Reset for next time
|
||||||
self._idx = 0
|
self._idx = 0
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
except:
|
except Exception:
|
||||||
self._idx = 0
|
self._idx = 0
|
||||||
raise
|
raise
|
||||||
self._idx += 1
|
self._idx += 1
|
||||||
|
@ -500,13 +542,15 @@ class CollectorIterator(object):
|
||||||
class UnorderedResultCollector(AbstractResultCollector):
|
class UnorderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
"""An AbstractResultCollector implementation that collects the
|
"""An AbstractResultCollector implementation that collects the
|
||||||
|
|
||||||
values of the ApplyResult objects in the order they become ready. The
|
values of the ApplyResult objects in the order they become ready. The
|
||||||
CollectorIterator object returned by __iter__() will iterate over
|
CollectorIterator object returned by __iter__() will iterate over
|
||||||
them in the order they become ready"""
|
them in the order they become ready.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, to_notify=None):
|
def __init__(self, to_notify=None):
|
||||||
"""
|
"""\param to_notify ApplyResult object to notify when all the
|
||||||
\param to_notify ApplyResult object to notify when all the
|
|
||||||
results we're waiting for become available. Can be None.
|
results we're waiting for become available. Can be None.
|
||||||
"""
|
"""
|
||||||
AbstractResultCollector.__init__(self, to_notify)
|
AbstractResultCollector.__init__(self, to_notify)
|
||||||
|
@ -515,8 +559,9 @@ class UnorderedResultCollector(AbstractResultCollector):
|
||||||
self._expected = 0
|
self._expected = 0
|
||||||
|
|
||||||
def register_result(self, apply_result):
|
def register_result(self, apply_result):
|
||||||
"""Used to identify which results we're waiting for. Will
|
"""Used to identify which results we're waiting for.
|
||||||
always be called BEFORE the Jobs get submitted to the work
|
|
||||||
|
Will always be called BEFORE the Jobs get submitted to the work
|
||||||
queue, and BEFORE the __iter__ and _get_result() methods can
|
queue, and BEFORE the __iter__ and _get_result() methods can
|
||||||
be called
|
be called
|
||||||
\param apply_result ApplyResult object to add in our collection
|
\param apply_result ApplyResult object to add in our collection
|
||||||
|
@ -525,6 +570,7 @@ class UnorderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
def _get_result(self, idx, timeout=None):
|
def _get_result(self, idx, timeout=None):
|
||||||
"""Called by the CollectorIterator object to retrieve the
|
"""Called by the CollectorIterator object to retrieve the
|
||||||
|
|
||||||
result's values one after another, in the order the results have
|
result's values one after another, in the order the results have
|
||||||
become available.
|
become available.
|
||||||
\param idx The index of the result we want, wrt collector's order
|
\param idx The index of the result we want, wrt collector's order
|
||||||
|
@ -553,6 +599,7 @@ class UnorderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
def notify_ready(self, apply_result):
|
def notify_ready(self, apply_result):
|
||||||
"""Called by the ApplyResult object (already registered via
|
"""Called by the ApplyResult object (already registered via
|
||||||
|
|
||||||
register_result()) that it is now ready (ie. the Job's result
|
register_result()) that it is now ready (ie. the Job's result
|
||||||
is available or an exception has been raised).
|
is available or an exception has been raised).
|
||||||
\param apply_result ApplyResult object telling us that the job
|
\param apply_result ApplyResult object telling us that the job
|
||||||
|
@ -575,13 +622,15 @@ class UnorderedResultCollector(AbstractResultCollector):
|
||||||
class OrderedResultCollector(AbstractResultCollector):
|
class OrderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
"""An AbstractResultCollector implementation that collects the
|
"""An AbstractResultCollector implementation that collects the
|
||||||
|
|
||||||
values of the ApplyResult objects in the order they have been
|
values of the ApplyResult objects in the order they have been
|
||||||
submitted. The CollectorIterator object returned by __iter__()
|
submitted. The CollectorIterator object returned by __iter__()
|
||||||
will iterate over them in the order they have been submitted"""
|
will iterate over them in the order they have been submitted.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, to_notify=None, as_iterator=True):
|
def __init__(self, to_notify=None, as_iterator=True):
|
||||||
"""
|
"""\param to_notify ApplyResult object to notify when all the
|
||||||
\param to_notify ApplyResult object to notify when all the
|
|
||||||
results we're waiting for become available. Can be None.
|
results we're waiting for become available. Can be None.
|
||||||
\param as_iterator boolean telling whether the result value
|
\param as_iterator boolean telling whether the result value
|
||||||
set on to_notify should be an iterator (available as soon as 1
|
set on to_notify should be an iterator (available as soon as 1
|
||||||
|
@ -595,8 +644,9 @@ class OrderedResultCollector(AbstractResultCollector):
|
||||||
self._as_iterator = as_iterator
|
self._as_iterator = as_iterator
|
||||||
|
|
||||||
def register_result(self, apply_result):
|
def register_result(self, apply_result):
|
||||||
"""Used to identify which results we're waiting for. Will
|
"""Used to identify which results we're waiting for.
|
||||||
always be called BEFORE the Jobs get submitted to the work
|
|
||||||
|
Will always be called BEFORE the Jobs get submitted to the work
|
||||||
queue, and BEFORE the __iter__ and _get_result() methods can
|
queue, and BEFORE the __iter__ and _get_result() methods can
|
||||||
be called
|
be called
|
||||||
\param apply_result ApplyResult object to add in our collection
|
\param apply_result ApplyResult object to add in our collection
|
||||||
|
@ -606,6 +656,7 @@ class OrderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
def _get_result(self, idx, timeout=None):
|
def _get_result(self, idx, timeout=None):
|
||||||
"""Called by the CollectorIterator object to retrieve the
|
"""Called by the CollectorIterator object to retrieve the
|
||||||
|
|
||||||
result's values one after another (order defined by the
|
result's values one after another (order defined by the
|
||||||
implementation)
|
implementation)
|
||||||
\param idx The index of the result we want, wrt collector's order
|
\param idx The index of the result we want, wrt collector's order
|
||||||
|
@ -619,6 +670,7 @@ class OrderedResultCollector(AbstractResultCollector):
|
||||||
|
|
||||||
def notify_ready(self, apply_result):
|
def notify_ready(self, apply_result):
|
||||||
"""Called by the ApplyResult object (already registered via
|
"""Called by the ApplyResult object (already registered via
|
||||||
|
|
||||||
register_result()) that it is now ready (ie. the Job's result
|
register_result()) that it is now ready (ie. the Job's result
|
||||||
is available or an exception has been raised).
|
is available or an exception has been raised).
|
||||||
\param apply_result ApplyResult object telling us that the job
|
\param apply_result ApplyResult object telling us that the job
|
||||||
|
@ -641,25 +693,25 @@ class OrderedResultCollector(AbstractResultCollector):
|
||||||
elif not self._as_iterator and got_last:
|
elif not self._as_iterator and got_last:
|
||||||
try:
|
try:
|
||||||
lst = [r.get(0) for r in self._results]
|
lst = [r.get(0) for r in self._results]
|
||||||
except:
|
except Exception:
|
||||||
self._to_notify._set_exception()
|
self._to_notify._set_exception()
|
||||||
else:
|
else:
|
||||||
self._to_notify._set_value(lst)
|
self._to_notify._set_value(lst)
|
||||||
|
|
||||||
|
|
||||||
def _test():
|
def _test():
|
||||||
"""Some tests"""
|
"""Some tests.
|
||||||
import thread
|
"""
|
||||||
import time
|
import time
|
||||||
|
|
||||||
def f(x):
|
def f(x):
|
||||||
return x * x
|
return x * x
|
||||||
|
|
||||||
def work(seconds):
|
def work(seconds):
|
||||||
print("[%d] Start to work for %fs..." % (thread.get_ident(), seconds))
|
print("[%d] Start to work for %fs..." % (threading.thread.get_ident(), seconds))
|
||||||
time.sleep(seconds)
|
time.sleep(seconds)
|
||||||
print("[%d] Work done (%fs)." % (thread.get_ident(), seconds))
|
print("[%d] Work done (%fs)." % (threading.thread.get_ident(), seconds))
|
||||||
return "%d slept %fs" % (thread.get_ident(), seconds)
|
return "%d slept %fs" % (threading.thread.get_ident(), seconds)
|
||||||
|
|
||||||
# Test copy/pasted from multiprocessing
|
# Test copy/pasted from multiprocessing
|
||||||
pool = Pool(9) # start 4 worker threads
|
pool = Pool(9) # start 4 worker threads
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from collections import namedtuple
|
import collections
|
||||||
import time
|
import Queue
|
||||||
from Queue import Queue, Empty
|
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
import monagent.collector.checks
|
||||||
from monagent.collector.checks.libs.thread_pool import Pool
|
import monagent.collector.checks.libs.thread_pool
|
||||||
|
|
||||||
|
|
||||||
TIMEOUT = 180
|
TIMEOUT = 180
|
||||||
|
@ -12,16 +12,16 @@ DEFAULT_SIZE_POOL = 6
|
||||||
MAX_LOOP_ITERATIONS = 1000
|
MAX_LOOP_ITERATIONS = 1000
|
||||||
FAILURE = "FAILURE"
|
FAILURE = "FAILURE"
|
||||||
|
|
||||||
up_down = namedtuple('up_down', ['UP', 'DOWN'])
|
up_down = collections.namedtuple('up_down', ['UP', 'DOWN'])
|
||||||
Status = up_down('UP', 'DOWN')
|
Status = up_down('UP', 'DOWN')
|
||||||
EventType = up_down("servicecheck.state_change.up", "servicecheck.state_change.down")
|
EventType = up_down("servicecheck.state_change.up", "servicecheck.state_change.down")
|
||||||
|
|
||||||
|
|
||||||
class ServicesCheck(AgentCheck):
|
class ServicesCheck(monagent.collector.checks.AgentCheck):
|
||||||
SOURCE_TYPE_NAME = 'servicecheck'
|
SOURCE_TYPE_NAME = 'servicecheck'
|
||||||
|
|
||||||
"""
|
"""Services checks inherits from this class.
|
||||||
Services checks inherits from this class.
|
|
||||||
This class should never be directly instanciated.
|
This class should never be directly instanciated.
|
||||||
|
|
||||||
Work flow:
|
Work flow:
|
||||||
|
@ -36,11 +36,10 @@ class ServicesCheck(AgentCheck):
|
||||||
Status.UP or Status.DOWN.
|
Status.UP or Status.DOWN.
|
||||||
The second element is a short error message that will be displayed
|
The second element is a short error message that will be displayed
|
||||||
when the service turns down.
|
when the service turns down.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name, init_config, agentConfig, instances):
|
def __init__(self, name, init_config, agentConfig, instances):
|
||||||
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
|
monagent.collector.checks.AgentCheck.__init__(self, name, init_config, agentConfig, instances)
|
||||||
|
|
||||||
# A dictionary to keep track of service statuses
|
# A dictionary to keep track of service statuses
|
||||||
self.statuses = {}
|
self.statuses = {}
|
||||||
|
@ -60,9 +59,9 @@ class ServicesCheck(AgentCheck):
|
||||||
default_size = min(self.instance_count(), DEFAULT_SIZE_POOL)
|
default_size = min(self.instance_count(), DEFAULT_SIZE_POOL)
|
||||||
self.pool_size = int(self.init_config.get('threads_count', default_size))
|
self.pool_size = int(self.init_config.get('threads_count', default_size))
|
||||||
|
|
||||||
self.pool = Pool(self.pool_size)
|
self.pool = monagent.collector.checks.libs.thread_pool.Pool(self.pool_size)
|
||||||
|
|
||||||
self.resultsq = Queue()
|
self.resultsq = Queue.Queue()
|
||||||
self.jobs_status = {}
|
self.jobs_status = {}
|
||||||
self.pool_started = True
|
self.pool_started = True
|
||||||
|
|
||||||
|
@ -110,7 +109,7 @@ class ServicesCheck(AgentCheck):
|
||||||
# We put the results in the result queue
|
# We put the results in the result queue
|
||||||
self.resultsq.put(result)
|
self.resultsq.put(result)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception:
|
||||||
result = (FAILURE, FAILURE, FAILURE, FAILURE)
|
result = (FAILURE, FAILURE, FAILURE, FAILURE)
|
||||||
self.resultsq.put(result)
|
self.resultsq.put(result)
|
||||||
|
|
||||||
|
@ -119,7 +118,7 @@ class ServicesCheck(AgentCheck):
|
||||||
try:
|
try:
|
||||||
# We want to fetch the result in a non blocking way
|
# We want to fetch the result in a non blocking way
|
||||||
status, msg, name, queue_instance = self.resultsq.get_nowait()
|
status, msg, name, queue_instance = self.resultsq.get_nowait()
|
||||||
except Empty:
|
except Queue.Empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
if status == FAILURE:
|
if status == FAILURE:
|
||||||
|
@ -165,13 +164,12 @@ class ServicesCheck(AgentCheck):
|
||||||
del self.jobs_status[name]
|
del self.jobs_status[name]
|
||||||
|
|
||||||
def _check(self, instance):
|
def _check(self, instance):
|
||||||
"""This function should be implemented by inherited classes"""
|
"""This function should be implemented by inherited classes.
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def _clean(self):
|
def _clean(self):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
stuck_process = None
|
|
||||||
stuck_time = time.time()
|
|
||||||
for name in self.jobs_status.keys():
|
for name in self.jobs_status.keys():
|
||||||
start_time = self.jobs_status[name]
|
start_time = self.jobs_status[name]
|
||||||
if now - start_time > TIMEOUT:
|
if now - start_time > TIMEOUT:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
"""
|
"""Unix system checks.
|
||||||
Unix system checks.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# stdlib
|
# stdlib
|
||||||
|
import functools
|
||||||
import logging
|
import logging
|
||||||
import operator
|
import operator
|
||||||
import platform
|
import platform
|
||||||
|
@ -12,22 +12,24 @@ import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
# project
|
# project
|
||||||
from monagent.collector.checks.check import Check
|
|
||||||
from monagent.common.metrics import Measurement
|
import monagent.collector.checks.check
|
||||||
from monagent.common.util import Platform
|
import monagent.common.metrics
|
||||||
from functools import reduce
|
import monagent.common.util
|
||||||
|
|
||||||
|
|
||||||
# locale-resilient float converter
|
# locale-resilient float converter
|
||||||
to_float = lambda s: float(s.replace(",", "."))
|
to_float = lambda s: float(s.replace(",", "."))
|
||||||
|
|
||||||
|
|
||||||
class Disk(Check):
|
class Disk(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
""" Collects metrics about the machine's disks. """
|
"""Collects metrics about the machine's disks.
|
||||||
|
"""
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
"""Get disk space/inode stats"""
|
"""Get disk space/inode stats.
|
||||||
|
"""
|
||||||
# First get the configuration.
|
# First get the configuration.
|
||||||
if self.agent_config is not None:
|
if self.agent_config is not None:
|
||||||
use_mount = self.agent_config.get("use_mount", False)
|
use_mount = self.agent_config.get("use_mount", False)
|
||||||
|
@ -58,12 +60,12 @@ class Disk(Check):
|
||||||
# parse into a list of Measurements
|
# parse into a list of Measurements
|
||||||
stats.update(inodes)
|
stats.update(inodes)
|
||||||
timestamp = time.time()
|
timestamp = time.time()
|
||||||
measurements = [
|
measurements = [monagent.common.metrics.Measurement(key.split('.', 1)[1],
|
||||||
Measurement(
|
timestamp,
|
||||||
key.split(
|
value,
|
||||||
'.', 1)[1], timestamp, value, {
|
{'device': key.split('.', 1)[0]})
|
||||||
'device': key.split(
|
for key, value in stats.iteritems()]
|
||||||
'.', 1)[0]}) for key, value in stats.iteritems()]
|
|
||||||
return measurements
|
return measurements
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -72,9 +74,9 @@ class Disk(Check):
|
||||||
|
|
||||||
def parse_df_output(
|
def parse_df_output(
|
||||||
self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None):
|
self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None):
|
||||||
"""
|
"""Parse the output of the df command.
|
||||||
Parse the output of the df command. If use_volume is true the volume
|
|
||||||
is used to anchor the metric, otherwise false the mount
|
If use_volume is true the volume is used to anchor the metric, otherwise false the mount
|
||||||
point is used. Returns a tuple of (disk, inode).
|
point is used. Returns a tuple of (disk, inode).
|
||||||
"""
|
"""
|
||||||
usage_data = {}
|
usage_data = {}
|
||||||
|
@ -88,14 +90,14 @@ class Disk(Check):
|
||||||
if use_mount:
|
if use_mount:
|
||||||
parts[0] = parts[-1]
|
parts[0] = parts[-1]
|
||||||
if inodes:
|
if inodes:
|
||||||
if Platform.is_darwin(platform_name):
|
if monagent.common.util.Platform.is_darwin(platform_name):
|
||||||
# Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted
|
# Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted
|
||||||
# Inodes are in position 5, 6 and we need to compute the total
|
# Inodes are in position 5, 6 and we need to compute the total
|
||||||
# Total
|
# Total
|
||||||
parts[1] = int(parts[5]) + int(parts[6]) # Total
|
parts[1] = int(parts[5]) + int(parts[6]) # Total
|
||||||
parts[2] = int(parts[5]) # Used
|
parts[2] = int(parts[5]) # Used
|
||||||
parts[3] = int(parts[6]) # Available
|
parts[3] = int(parts[6]) # Available
|
||||||
elif Platform.is_freebsd(platform_name):
|
elif monagent.common.util.Platform.is_freebsd(platform_name):
|
||||||
# Filesystem 1K-blocks Used Avail Capacity iused ifree %iused Mounted
|
# Filesystem 1K-blocks Used Avail Capacity iused ifree %iused Mounted
|
||||||
# Inodes are in position 5, 6 and we need to compute the total
|
# Inodes are in position 5, 6 and we need to compute the total
|
||||||
parts[1] = int(parts[5]) + int(parts[6]) # Total
|
parts[1] = int(parts[5]) + int(parts[6]) # Total
|
||||||
|
@ -128,8 +130,7 @@ class Disk(Check):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _is_real_device(self, device):
|
def _is_real_device(self, device):
|
||||||
"""
|
"""Return true if we should track the given device name and false otherwise.
|
||||||
Return true if we should track the given device name and false otherwise.
|
|
||||||
"""
|
"""
|
||||||
# First, skip empty lines.
|
# First, skip empty lines.
|
||||||
if not device or len(device) <= 1:
|
if not device or len(device) <= 1:
|
||||||
|
@ -164,10 +165,9 @@ class Disk(Check):
|
||||||
return devices
|
return devices
|
||||||
|
|
||||||
def _transform_df_output(self, df_output, blacklist_re):
|
def _transform_df_output(self, df_output, blacklist_re):
|
||||||
"""
|
"""Given raw output for the df command, transform it into a normalized list devices.
|
||||||
Given raw output for the df command, transform it into a normalized
|
|
||||||
list devices. A 'device' is a list with fields corresponding to the
|
A 'device' is a list with fields corresponding to the output of df output on each platform.
|
||||||
output of df output on each platform.
|
|
||||||
"""
|
"""
|
||||||
all_devices = [l.strip().split() for l in df_output.split("\n")]
|
all_devices = [l.strip().split() for l in df_output.split("\n")]
|
||||||
|
|
||||||
|
@ -190,10 +190,10 @@ class Disk(Check):
|
||||||
return devices
|
return devices
|
||||||
|
|
||||||
|
|
||||||
class IO(Check):
|
class IO(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.header_re = re.compile(r'([%\\/\-_a-zA-Z0-9]+)[\s+]?')
|
self.header_re = re.compile(r'([%\\/\-_a-zA-Z0-9]+)[\s+]?')
|
||||||
self.item_re = re.compile(r'^([a-zA-Z0-9\/]+)')
|
self.item_re = re.compile(r'^([a-zA-Z0-9\/]+)')
|
||||||
self.value_re = re.compile(r'\d+\.\d+')
|
self.value_re = re.compile(r'\d+\.\d+')
|
||||||
|
@ -252,7 +252,8 @@ class IO(Check):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def xlate(metric_name, os_name):
|
def xlate(metric_name, os_name):
|
||||||
"""Standardize on linux metric names"""
|
"""Standardize on linux metric names.
|
||||||
|
"""
|
||||||
if os_name == "sunos":
|
if os_name == "sunos":
|
||||||
names = {"wait": "await",
|
names = {"wait": "await",
|
||||||
"svc_t": "svctm",
|
"svc_t": "svctm",
|
||||||
|
@ -282,7 +283,7 @@ class IO(Check):
|
||||||
"""
|
"""
|
||||||
io = {}
|
io = {}
|
||||||
try:
|
try:
|
||||||
if Platform.is_linux():
|
if monagent.common.util.Platform.is_linux():
|
||||||
stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'],
|
stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'],
|
||||||
stdout=sp.PIPE,
|
stdout=sp.PIPE,
|
||||||
close_fds=True).communicate()[0]
|
close_fds=True).communicate()[0]
|
||||||
|
@ -394,7 +395,7 @@ class IO(Check):
|
||||||
for dev_name, stats in filtered_io.iteritems():
|
for dev_name, stats in filtered_io.iteritems():
|
||||||
filtered_stats = {stat: stats[stat]
|
filtered_stats = {stat: stats[stat]
|
||||||
for stat in stats.iterkeys() if stat not in self.stat_blacklist}
|
for stat in stats.iterkeys() if stat not in self.stat_blacklist}
|
||||||
m_list = [Measurement(key, timestamp, value, {'device': dev_name})
|
m_list = [monagent.common.metrics.Measurement(key, timestamp, value, {'device': dev_name})
|
||||||
for key, value in filtered_stats.iteritems()]
|
for key, value in filtered_stats.iteritems()]
|
||||||
measurements.extend(m_list)
|
measurements.extend(m_list)
|
||||||
|
|
||||||
|
@ -405,10 +406,10 @@ class IO(Check):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class Load(Check):
|
class Load(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
if Platform.is_linux():
|
if monagent.common.util.Platform.is_linux():
|
||||||
try:
|
try:
|
||||||
loadAvrgProc = open('/proc/loadavg', 'r')
|
loadAvrgProc = open('/proc/loadavg', 'r')
|
||||||
uptime = loadAvrgProc.readlines()
|
uptime = loadAvrgProc.readlines()
|
||||||
|
@ -437,10 +438,10 @@ class Load(Check):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Memory(Check):
|
class Memory(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
macV = None
|
macV = None
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
macV = platform.mac_ver()
|
macV = platform.mac_ver()
|
||||||
|
@ -464,7 +465,7 @@ class Memory(Check):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
if Platform.is_linux():
|
if monagent.common.util.Platform.is_linux():
|
||||||
try:
|
try:
|
||||||
meminfoProc = open('/proc/meminfo', 'r')
|
meminfoProc = open('/proc/meminfo', 'r')
|
||||||
lines = meminfoProc.readlines()
|
lines = meminfoProc.readlines()
|
||||||
|
@ -736,10 +737,11 @@ class Memory(Check):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class Cpu(Check):
|
class Cpu(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
"""Return an aggregate of CPU stats across all CPUs
|
"""Return an aggregate of CPU stats across all CPUs.
|
||||||
|
|
||||||
When figures are not available, False is sent back.
|
When figures are not available, False is sent back.
|
||||||
"""
|
"""
|
||||||
def format_results(us, sy, wa, idle, st):
|
def format_results(us, sy, wa, idle, st):
|
||||||
|
@ -754,7 +756,8 @@ class Cpu(Check):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_value(legend, data, name, filter_value=None):
|
def get_value(legend, data, name, filter_value=None):
|
||||||
"Using the legend and a metric name, get the value or None from the data line"
|
"""Using the legend and a metric name, get the value or None from the data line.
|
||||||
|
"""
|
||||||
if name in legend:
|
if name in legend:
|
||||||
value = to_float(data[legend.index(name)])
|
value = to_float(data[legend.index(name)])
|
||||||
if filter_value is not None:
|
if filter_value is not None:
|
||||||
|
@ -767,7 +770,7 @@ class Cpu(Check):
|
||||||
self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend))
|
self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend))
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
if Platform.is_linux():
|
if monagent.common.util.Platform.is_linux():
|
||||||
mpstat = sp.Popen(['mpstat', '1', '3'], stdout=sp.PIPE, close_fds=True).communicate()[0]
|
mpstat = sp.Popen(['mpstat', '1', '3'], stdout=sp.PIPE, close_fds=True).communicate()[0]
|
||||||
# topdog@ip:~$ mpstat 1 3
|
# topdog@ip:~$ mpstat 1 3
|
||||||
# Linux 2.6.32-341-ec2 (ip) 01/19/2012 _x86_64_ (2 CPU)
|
# Linux 2.6.32-341-ec2 (ip) 01/19/2012 _x86_64_ (2 CPU)
|
||||||
|
@ -917,7 +920,7 @@ class Cpu(Check):
|
||||||
size = [get_value(headers, l.split(), "sze") for l in d_lines]
|
size = [get_value(headers, l.split(), "sze") for l in d_lines]
|
||||||
count = sum(size)
|
count = sum(size)
|
||||||
rel_size = [s / count for s in size]
|
rel_size = [s / count for s in size]
|
||||||
dot = lambda v1, v2: reduce(operator.add, map(operator.mul, v1, v2))
|
dot = lambda v1, v2: functools.reduce(operator.add, map(operator.mul, v1, v2))
|
||||||
return format_results(dot(user, rel_size),
|
return format_results(dot(user, rel_size),
|
||||||
dot(kern, rel_size),
|
dot(kern, rel_size),
|
||||||
dot(wait, rel_size),
|
dot(wait, rel_size),
|
||||||
|
@ -932,9 +935,9 @@ class Cpu(Check):
|
||||||
|
|
||||||
|
|
||||||
def _get_subprocess_output(command):
|
def _get_subprocess_output(command):
|
||||||
"""
|
"""Run the given subprocess command and return it's output.
|
||||||
Run the given subprocess command and return it's output. Raise an Exception
|
|
||||||
if an error occurs.
|
Raise an Exception if an error occurs.
|
||||||
"""
|
"""
|
||||||
proc = sp.Popen(command, stdout=sp.PIPE, close_fds=True)
|
proc = sp.Popen(command, stdout=sp.PIPE, close_fds=True)
|
||||||
return proc.stdout.read()
|
return proc.stdout.read()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from monagent.collector.checks.check import Check
|
import monagent.collector.checks.check
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import wmi
|
import wmi
|
||||||
|
@ -15,10 +15,10 @@ B2MB = float(1048576)
|
||||||
KB2MB = B2KB = float(1024)
|
KB2MB = B2KB = float(1024)
|
||||||
|
|
||||||
|
|
||||||
class Processes(Check):
|
class Processes(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.gauge('system.proc.queue_length')
|
self.gauge('system.proc.queue_length')
|
||||||
self.gauge('system.proc.count')
|
self.gauge('system.proc.count')
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ class Processes(Check):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cpu = w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0]
|
w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.logger.info('Missing Win32_PerfFormattedData_PerfOS_Processor WMI class.' +
|
self.logger.info('Missing Win32_PerfFormattedData_PerfOS_Processor WMI class.' +
|
||||||
' No process metrics will be returned.')
|
' No process metrics will be returned.')
|
||||||
|
@ -44,10 +44,10 @@ class Processes(Check):
|
||||||
return self.get_metrics()
|
return self.get_metrics()
|
||||||
|
|
||||||
|
|
||||||
class Memory(Check):
|
class Memory(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.gauge('system.mem.free')
|
self.gauge('system.mem.free')
|
||||||
self.gauge('system.mem.used')
|
self.gauge('system.mem.used')
|
||||||
|
@ -84,10 +84,10 @@ class Memory(Check):
|
||||||
return self.get_metrics()
|
return self.get_metrics()
|
||||||
|
|
||||||
|
|
||||||
class Cpu(Check):
|
class Cpu(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.gauge('system.cpu.user')
|
self.gauge('system.cpu.user')
|
||||||
self.gauge('system.cpu.idle')
|
self.gauge('system.cpu.idle')
|
||||||
|
@ -122,9 +122,10 @@ class Cpu(Check):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _average_metric(wmi_class, wmi_prop):
|
def _average_metric(wmi_class, wmi_prop):
|
||||||
''' Sum all of the values of a metric from a WMI class object, excluding
|
"""Sum all of the values of a metric from a WMI class object.
|
||||||
the value for "_Total"
|
|
||||||
'''
|
Excludes the value for "_Total"
|
||||||
|
"""
|
||||||
val = 0
|
val = 0
|
||||||
counter = 0
|
counter = 0
|
||||||
for wmi_object in wmi_class:
|
for wmi_object in wmi_class:
|
||||||
|
@ -142,10 +143,10 @@ class Cpu(Check):
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
class Network(Check):
|
class Network(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.gauge('system.net.bytes_rcvd')
|
self.gauge('system.net.bytes_rcvd')
|
||||||
self.gauge('system.net.bytes_sent')
|
self.gauge('system.net.bytes_sent')
|
||||||
|
@ -169,10 +170,10 @@ class Network(Check):
|
||||||
return self.get_metrics()
|
return self.get_metrics()
|
||||||
|
|
||||||
|
|
||||||
class Disk(Check):
|
class Disk(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.gauge('system.disk.free')
|
self.gauge('system.disk.free')
|
||||||
self.gauge('system.disk.total')
|
self.gauge('system.disk.total')
|
||||||
|
@ -203,10 +204,10 @@ class Disk(Check):
|
||||||
return self.get_metrics()
|
return self.get_metrics()
|
||||||
|
|
||||||
|
|
||||||
class IO(Check):
|
class IO(monagent.collector.checks.check.Check):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger):
|
||||||
Check.__init__(self, logger)
|
monagent.collector.checks.check.Check.__init__(self, logger)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.gauge('system.io.wkb_s')
|
self.gauge('system.io.wkb_s')
|
||||||
self.gauge('system.io.w_s')
|
self.gauge('system.io.w_s')
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import os
|
import os
|
||||||
|
import stat
|
||||||
|
|
||||||
# os.SEEK_END is defined in python 2.5
|
# os.SEEK_END is defined in python 2.5
|
||||||
SEEK_END = 2
|
SEEK_END = 2
|
||||||
|
|
||||||
from stat import *
|
|
||||||
import binascii
|
|
||||||
|
|
||||||
|
|
||||||
def median(vals):
|
def median(vals):
|
||||||
vals = sorted(vals)
|
vals = sorted(vals)
|
||||||
|
@ -21,8 +20,9 @@ def median(vals):
|
||||||
|
|
||||||
|
|
||||||
def add_basic_auth(request, username, password):
|
def add_basic_auth(request, username, password):
|
||||||
""" A helper to add basic authentication to a urllib2 request. We do this
|
"""A helper to add basic authentication to a urllib2 request.
|
||||||
across a variety of checks so it's good to have this in one place.
|
|
||||||
|
We do this across a variety of checks so it's good to have this in one place.
|
||||||
"""
|
"""
|
||||||
auth_str = base64.encodestring('%s:%s' % (username, password)).strip()
|
auth_str = base64.encodestring('%s:%s' % (username, password)).strip()
|
||||||
request.add_header('Authorization', 'Basic %s' % auth_str)
|
request.add_header('Authorization', 'Basic %s' % auth_str)
|
||||||
|
@ -52,8 +52,8 @@ class TailFile(object):
|
||||||
already_open = True
|
already_open = True
|
||||||
|
|
||||||
stat = os.stat(self._path)
|
stat = os.stat(self._path)
|
||||||
inode = stat[ST_INO]
|
inode = stat[stat.ST_INO]
|
||||||
size = stat[ST_SIZE]
|
size = stat[stat.ST_SIZE]
|
||||||
|
|
||||||
# Compute CRC of the beginning of the file
|
# Compute CRC of the beginning of the file
|
||||||
crc = None
|
crc = None
|
||||||
|
@ -98,8 +98,10 @@ class TailFile(object):
|
||||||
|
|
||||||
def tail(self, line_by_line=True, move_end=True):
|
def tail(self, line_by_line=True, move_end=True):
|
||||||
"""Read line-by-line and run callback on each line.
|
"""Read line-by-line and run callback on each line.
|
||||||
|
|
||||||
line_by_line: yield each time a callback has returned True
|
line_by_line: yield each time a callback has returned True
|
||||||
move_end: start from the last line of the log"""
|
move_end: start from the last line of the log
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
self._open_file(move_end=move_end)
|
self._open_file(move_end=move_end)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.collector.checks.utils import add_basic_auth
|
from monagent.collector.checks.utils import add_basic_auth
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
class Apache(AgentCheck):
|
class Apache(AgentCheck):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
|
from collections import namedtuple
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
@ -52,16 +52,16 @@ class Cacti(AgentCheck):
|
||||||
# The rrdtool module is required for the check to work
|
# The rrdtool module is required for the check to work
|
||||||
try:
|
try:
|
||||||
import rrdtool
|
import rrdtool
|
||||||
except ImportError as e:
|
except ImportError:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Cannot import rrdtool module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti")
|
"Cannot import rrdtool module. This module is required for the cacti plugin to work correctly")
|
||||||
|
|
||||||
# Try importing MySQL
|
# Try importing MySQL
|
||||||
try:
|
try:
|
||||||
import MySQLdb
|
import MySQLdb
|
||||||
except ImportError as e:
|
except ImportError:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Cannot import MySQLdb module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti")
|
"Cannot import MySQLdb module. This module is required for the cacti plugin to work correctly")
|
||||||
|
|
||||||
connection = MySQLdb.connect(config.host, config.user, config.password, config.db)
|
connection = MySQLdb.connect(config.host, config.user, config.password, config.db)
|
||||||
|
|
||||||
|
@ -123,7 +123,9 @@ class Cacti(AgentCheck):
|
||||||
return Config(host, user, password, db, rrd_path, whitelist, field_names)
|
return Config(host, user, password, db, rrd_path, whitelist, field_names)
|
||||||
|
|
||||||
def _read_rrd(self, rrd_path, hostname, device_name):
|
def _read_rrd(self, rrd_path, hostname, device_name):
|
||||||
''' Main metric fetching method '''
|
"""Main metric fetching method.
|
||||||
|
|
||||||
|
"""
|
||||||
import rrdtool
|
import rrdtool
|
||||||
metric_count = 0
|
metric_count = 0
|
||||||
|
|
||||||
|
@ -177,9 +179,10 @@ class Cacti(AgentCheck):
|
||||||
return metric_count
|
return metric_count
|
||||||
|
|
||||||
def _fetch_rrd_meta(self, connection, rrd_path_root, whitelist, field_names):
|
def _fetch_rrd_meta(self, connection, rrd_path_root, whitelist, field_names):
|
||||||
''' Fetch metadata about each RRD in this Cacti DB, returning a list of
|
"""Fetch metadata about each RRD in this Cacti DB.
|
||||||
tuples of (hostname, device_name, rrd_path)
|
|
||||||
'''
|
Returns a list of tuples of (hostname, device_name, rrd_path)
|
||||||
|
"""
|
||||||
def _in_whitelist(rrd):
|
def _in_whitelist(rrd):
|
||||||
path = rrd.replace('<path_rra>/', '')
|
path = rrd.replace('<path_rra>/', '')
|
||||||
for p in whitelist:
|
for p in whitelist:
|
||||||
|
@ -226,7 +229,9 @@ class Cacti(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _format_metric_name(m_name, cfunc):
|
def _format_metric_name(m_name, cfunc):
|
||||||
''' Format a cacti metric name into a Datadog-friendly name '''
|
"""Format a cacti metric name into a Datadog-friendly name.
|
||||||
|
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
aggr = CFUNC_TO_AGGR[cfunc]
|
aggr = CFUNC_TO_AGGR[cfunc]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -242,16 +247,18 @@ class Cacti(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _transform_metric(m_name, val):
|
def _transform_metric(m_name, val):
|
||||||
''' Add any special case transformations here '''
|
"""Add any special case transformations here.
|
||||||
|
|
||||||
|
"""
|
||||||
# Report memory in MB
|
# Report memory in MB
|
||||||
if m_name[0:11] in ('system.mem.', 'system.disk'):
|
if m_name[0:11] in ('system.mem.', 'system.disk'):
|
||||||
return val / 1024
|
return val / 1024
|
||||||
return val
|
return val
|
||||||
|
|
||||||
'''
|
"""For backwards compatability with pre-checks_d configuration.
|
||||||
For backwards compatability with pre-checks_d configuration.
|
|
||||||
Convert old-style config to new-style config.
|
Convert old-style config to new-style config.
|
||||||
'''
|
"""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_agent_config(agentConfig):
|
def parse_agent_config(agentConfig):
|
||||||
required = ['cacti_mysql_server', 'cacti_mysql_user', 'cacti_rrd_path']
|
required = ['cacti_mysql_server', 'cacti_mysql_user', 'cacti_rrd_path']
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
import urllib2
|
|
||||||
import json
|
import json
|
||||||
|
import urllib2
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
class CouchDb(AgentCheck):
|
class CouchDb(AgentCheck):
|
||||||
|
|
||||||
"""Extracts stats from CouchDB via its REST API
|
"""Extracts stats from CouchDB via its REST API
|
||||||
|
|
||||||
http://wiki.apache.org/couchdb/Runtime_Statistics
|
http://wiki.apache.org/couchdb/Runtime_Statistics
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -28,7 +29,9 @@ class CouchDb(AgentCheck):
|
||||||
self.gauge(metric_name, val, dimensions=metric_dimensions, device_name=db_name)
|
self.gauge(metric_name, val, dimensions=metric_dimensions, device_name=db_name)
|
||||||
|
|
||||||
def _get_stats(self, url):
|
def _get_stats(self, url):
|
||||||
"Hit a given URL and return the parsed json"
|
"""Hit a given URL and return the parsed json.
|
||||||
|
|
||||||
|
"""
|
||||||
self.log.debug('Fetching Couchdb stats at url: %s' % url)
|
self.log.debug('Fetching Couchdb stats at url: %s' % url)
|
||||||
req = urllib2.Request(url, None, headers(self.agent_config))
|
req = urllib2.Request(url, None, headers(self.agent_config))
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,9 @@ import urllib2
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.collector.checks.utils import add_basic_auth
|
from monagent.collector.checks.utils import add_basic_auth
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
|
@ -16,6 +16,7 @@ DEFAULT_TIMEOUT = 10
|
||||||
class Couchbase(AgentCheck):
|
class Couchbase(AgentCheck):
|
||||||
|
|
||||||
"""Extracts stats from Couchbase via its REST API
|
"""Extracts stats from Couchbase via its REST API
|
||||||
|
|
||||||
http://docs.couchbase.com/couchbase-manual-2.0/#using-the-rest-api
|
http://docs.couchbase.com/couchbase-manual-2.0/#using-the-rest-api
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -49,7 +50,9 @@ class Couchbase(AgentCheck):
|
||||||
metric_name, val, dimensions=metric_dimensions, device_name=node_name)
|
metric_name, val, dimensions=metric_dimensions, device_name=node_name)
|
||||||
|
|
||||||
def _get_stats(self, url, instance):
|
def _get_stats(self, url, instance):
|
||||||
"Hit a given URL and return the parsed json"
|
"""Hit a given URL and return the parsed json.
|
||||||
|
|
||||||
|
"""
|
||||||
self.log.debug('Fetching Couchbase stats at url: %s' % url)
|
self.log.debug('Fetching Couchbase stats at url: %s' % url)
|
||||||
req = urllib2.Request(url, None, headers(self.agent_config))
|
req = urllib2.Request(url, None, headers(self.agent_config))
|
||||||
if 'user' in instance and 'password' in instance:
|
if 'user' in instance and 'password' in instance:
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
from os import stat, walk
|
from os import stat
|
||||||
from os.path import abspath, exists, join
|
from os import walk
|
||||||
|
from os.path import abspath
|
||||||
|
from os.path import exists
|
||||||
|
from os.path import join
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
import json
|
|
||||||
import urllib2
|
|
||||||
import urllib
|
|
||||||
import httplib
|
import httplib
|
||||||
import socket
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
|
import urllib2
|
||||||
|
import urllib
|
||||||
from urlparse import urlsplit
|
from urlparse import urlsplit
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
@ -70,7 +70,9 @@ DOCKER_TAGS = [
|
||||||
class UnixHTTPConnection(httplib.HTTPConnection, object):
|
class UnixHTTPConnection(httplib.HTTPConnection, object):
|
||||||
|
|
||||||
"""Class used in conjuction with UnixSocketHandler to make urllib2
|
"""Class used in conjuction with UnixSocketHandler to make urllib2
|
||||||
compatible with Unix sockets."""
|
|
||||||
|
compatible with Unix sockets.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, unix_socket):
|
def __init__(self, unix_socket):
|
||||||
self._unix_socket = unix_socket
|
self._unix_socket = unix_socket
|
||||||
|
@ -87,8 +89,9 @@ class UnixHTTPConnection(httplib.HTTPConnection, object):
|
||||||
|
|
||||||
class UnixSocketHandler(urllib2.AbstractHTTPHandler):
|
class UnixSocketHandler(urllib2.AbstractHTTPHandler):
|
||||||
|
|
||||||
"""Class that makes Unix sockets work with urllib2 without any additional
|
"""Class that makes Unix sockets work with urllib2 without any additional dependencies.
|
||||||
dependencies."""
|
|
||||||
|
"""
|
||||||
|
|
||||||
def unix_open(self, req):
|
def unix_open(self, req):
|
||||||
full_path = "%s%s" % urlsplit(req.get_full_url())[1:3]
|
full_path = "%s%s" % urlsplit(req.get_full_url())[1:3]
|
||||||
|
@ -180,15 +183,21 @@ class Docker(AgentCheck):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _get_containers(self, instance):
|
def _get_containers(self, instance):
|
||||||
"""Gets the list of running containers in Docker."""
|
"""Gets the list of running containers in Docker.
|
||||||
|
|
||||||
|
"""
|
||||||
return self._get_json("%(url)s/containers/json" % instance, params={"size": 1})
|
return self._get_json("%(url)s/containers/json" % instance, params={"size": 1})
|
||||||
|
|
||||||
def _get_container(self, instance, cid):
|
def _get_container(self, instance, cid):
|
||||||
"""Get container information from Docker, gived a container Id."""
|
"""Get container information from Docker, gived a container Id.
|
||||||
|
|
||||||
|
"""
|
||||||
return self._get_json("%s/containers/%s/json" % (instance["url"], cid))
|
return self._get_json("%s/containers/%s/json" % (instance["url"], cid))
|
||||||
|
|
||||||
def _get_json(self, uri, params=None):
|
def _get_json(self, uri, params=None):
|
||||||
"""Utility method to get and parse JSON streams."""
|
"""Utility method to get and parse JSON streams.
|
||||||
|
|
||||||
|
"""
|
||||||
if params:
|
if params:
|
||||||
uri = "%s?%s" % (uri, urllib.urlencode(params))
|
uri = "%s?%s" % (uri, urllib.urlencode(params))
|
||||||
self.log.debug("Connecting to: %s" % uri)
|
self.log.debug("Connecting to: %s" % uri)
|
||||||
|
@ -205,8 +214,10 @@ class Docker(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_cgroup(hierarchy):
|
def _find_cgroup(hierarchy):
|
||||||
"""Finds the mount point for a specified cgroup hierarchy. Works with
|
"""Finds the mount point for a specified cgroup hierarchy.
|
||||||
old style and new style mounts."""
|
|
||||||
|
Works with old style and new style mounts.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
fp = open("/proc/mounts")
|
fp = open("/proc/mounts")
|
||||||
mounts = map(lambda x: x.split(), fp.read().splitlines())
|
mounts = map(lambda x: x.split(), fp.read().splitlines())
|
||||||
|
@ -221,7 +232,9 @@ class Docker(AgentCheck):
|
||||||
return mountpoint
|
return mountpoint
|
||||||
|
|
||||||
def _parse_cgroup_file(self, file_):
|
def _parse_cgroup_file(self, file_):
|
||||||
"""Parses a cgroup pseudo file for key/values."""
|
"""Parses a cgroup pseudo file for key/values.
|
||||||
|
|
||||||
|
"""
|
||||||
fp = None
|
fp = None
|
||||||
try:
|
try:
|
||||||
self.log.debug("Opening file: %s" % file_)
|
self.log.debug("Opening file: %s" % file_)
|
||||||
|
|
|
@ -3,12 +3,12 @@ import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import urlparse
|
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import urlparse
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.collector.checks.utils import add_basic_auth
|
from monagent.collector.checks.utils import add_basic_auth
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
class NodeNotFound(Exception):
|
class NodeNotFound(Exception):
|
||||||
|
@ -154,8 +154,8 @@ class ElasticSearch(AgentCheck):
|
||||||
self._process_health_data(config_url, health_data, dimensions=dimensions)
|
self._process_health_data(config_url, health_data, dimensions=dimensions)
|
||||||
|
|
||||||
def _get_es_version(self, config_url, auth=None):
|
def _get_es_version(self, config_url, auth=None):
|
||||||
"""
|
"""Get the running version of Elastic Search.
|
||||||
Get the running version of Elastic Search
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -170,8 +170,8 @@ class ElasticSearch(AgentCheck):
|
||||||
return version
|
return version
|
||||||
|
|
||||||
def _define_params(self, version):
|
def _define_params(self, version):
|
||||||
"""
|
"""Define the set of URLs and METRICS to use depending on the running ES version.
|
||||||
Define the set of URLs and METRICS to use depending on the running ES version
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if version >= [0, 90, 10]:
|
if version >= [0, 90, 10]:
|
||||||
|
@ -214,8 +214,9 @@ class ElasticSearch(AgentCheck):
|
||||||
self.METRICS.update(additional_metrics)
|
self.METRICS.update(additional_metrics)
|
||||||
|
|
||||||
def _get_data(self, url, auth=None):
|
def _get_data(self, url, auth=None):
|
||||||
""" Hit a given URL and return the parsed json
|
"""Hit a given URL and return the parsed json
|
||||||
`auth` is a tuple of (username, password) or None
|
|
||||||
|
`auth` is a tuple of (username, password) or None
|
||||||
"""
|
"""
|
||||||
req = urllib2.Request(url, None, headers(self.agent_config))
|
req = urllib2.Request(url, None, headers(self.agent_config))
|
||||||
if auth:
|
if auth:
|
||||||
|
@ -264,8 +265,9 @@ class ElasticSearch(AgentCheck):
|
||||||
process_metric(metric, *desc)
|
process_metric(metric, *desc)
|
||||||
|
|
||||||
def _get_primary_addr(self, url, node_name, auth):
|
def _get_primary_addr(self, url, node_name, auth):
|
||||||
""" Returns a list of primary interface addresses as seen by ES.
|
"""Returns a list of primary interface addresses as seen by ES.
|
||||||
Used in ES < 0.19
|
|
||||||
|
Used in ES < 0.19
|
||||||
"""
|
"""
|
||||||
req = urllib2.Request(url, None, headers(self.agent_config))
|
req = urllib2.Request(url, None, headers(self.agent_config))
|
||||||
# Load basic authentication configuration, if available.
|
# Load basic authentication configuration, if available.
|
||||||
|
@ -286,9 +288,10 @@ class ElasticSearch(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _host_matches_node(primary_addrs):
|
def _host_matches_node(primary_addrs):
|
||||||
""" For < 0.19, check if the current host matches the IP given in the
|
"""For < 0.19, check if the current host matches the IP given in the
|
||||||
cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
|
|
||||||
`ifconfig` on Mac
|
cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
|
||||||
|
`ifconfig` on Mac
|
||||||
"""
|
"""
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
ifaces = subprocess.Popen(['ifconfig'], stdout=subprocess.PIPE)
|
ifaces = subprocess.Popen(['ifconfig'], stdout=subprocess.PIPE)
|
||||||
|
@ -312,6 +315,7 @@ class ElasticSearch(AgentCheck):
|
||||||
|
|
||||||
def _process_metric(self, data, metric, path, xform=None, dimensions=None):
|
def _process_metric(self, data, metric, path, xform=None, dimensions=None):
|
||||||
"""data: dictionary containing all the stats
|
"""data: dictionary containing all the stats
|
||||||
|
|
||||||
metric: datadog metric
|
metric: datadog metric
|
||||||
path: corresponding path in data, flattened, e.g. thread_pool.bulk.queue
|
path: corresponding path in data, flattened, e.g. thread_pool.bulk.queue
|
||||||
xfom: a lambda to apply to the numerical value
|
xfom: a lambda to apply to the numerical value
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
"""
|
"""Collects metrics from the gunicorn web server.
|
||||||
Collects metrics from the gunicorn web server.
|
|
||||||
|
|
||||||
http://gunicorn.org/
|
http://gunicorn.org/
|
||||||
"""
|
"""
|
||||||
|
@ -42,7 +41,9 @@ class GUnicornCheck(AgentCheck):
|
||||||
return {"psutil": version}
|
return {"psutil": version}
|
||||||
|
|
||||||
def check(self, instance):
|
def check(self, instance):
|
||||||
""" Collect metrics for the given gunicorn instance. """
|
"""Collect metrics for the given gunicorn instance.
|
||||||
|
|
||||||
|
"""
|
||||||
if not psutil:
|
if not psutil:
|
||||||
raise GUnicornCheckError("gunicorn check requires the psutil python package")
|
raise GUnicornCheckError("gunicorn check requires the psutil python package")
|
||||||
|
|
||||||
|
@ -107,7 +108,9 @@ class GUnicornCheck(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_master_proc_by_name(name):
|
def _get_master_proc_by_name(name):
|
||||||
""" Return a psutil process for the master gunicorn process with the given name. """
|
"""Return a psutil process for the master gunicorn process with the given name.
|
||||||
|
|
||||||
|
"""
|
||||||
master_name = GUnicornCheck._get_master_proc_name(name)
|
master_name = GUnicornCheck._get_master_proc_name(name)
|
||||||
master_procs = [
|
master_procs = [
|
||||||
p for p in psutil.process_iter() if p.cmdline and p.cmdline[0] == master_name]
|
p for p in psutil.process_iter() if p.cmdline and p.cmdline[0] == master_name]
|
||||||
|
@ -121,7 +124,9 @@ class GUnicornCheck(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_master_proc_name(name):
|
def _get_master_proc_name(name):
|
||||||
""" Return the name of the master gunicorn process for the given proc name. """
|
"""Return the name of the master gunicorn process for the given proc name.
|
||||||
|
|
||||||
|
"""
|
||||||
# Here's an example of a process list for a gunicorn box with name web1
|
# Here's an example of a process list for a gunicorn box with name web1
|
||||||
# root 22976 0.1 0.1 60364 13424 ? Ss 19:30 0:00 gunicorn: master [web1]
|
# root 22976 0.1 0.1 60364 13424 ? Ss 19:30 0:00 gunicorn: master [web1]
|
||||||
# web 22984 20.7 2.3 521924 176136 ? Sl 19:30 1:58 gunicorn: worker [web1]
|
# web 22984 20.7 2.3 521924 176136 ? Sl 19:30 1:58 gunicorn: worker [web1]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import urllib2
|
|
||||||
import time
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import time
|
||||||
|
import urllib2
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.common.util import headers
|
from monagent.common.util import headers
|
||||||
|
@ -65,7 +65,9 @@ class HAProxy(AgentCheck):
|
||||||
url=url, collect_status_metrics=collect_status_metrics)
|
url=url, collect_status_metrics=collect_status_metrics)
|
||||||
|
|
||||||
def _fetch_data(self, url, username, password):
|
def _fetch_data(self, url, username, password):
|
||||||
''' Hit a given URL and return the parsed json '''
|
"""Hit a given URL and return the parsed json.
|
||||||
|
|
||||||
|
"""
|
||||||
# Try to fetch data from the stats URL
|
# Try to fetch data from the stats URL
|
||||||
|
|
||||||
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
|
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
|
||||||
|
@ -85,8 +87,10 @@ class HAProxy(AgentCheck):
|
||||||
|
|
||||||
def _process_data(self, data, collect_aggregates_only, process_events,
|
def _process_data(self, data, collect_aggregates_only, process_events,
|
||||||
url=None, collect_status_metrics=False):
|
url=None, collect_status_metrics=False):
|
||||||
''' Main data-processing loop. For each piece of useful data, we'll
|
"""Main data-processing loop. For each piece of useful data, we'll
|
||||||
either save a metric, save an event or both. '''
|
|
||||||
|
either save a metric, save an event or both.
|
||||||
|
"""
|
||||||
|
|
||||||
# Split the first line into an index of fields
|
# Split the first line into an index of fields
|
||||||
# The line looks like:
|
# The line looks like:
|
||||||
|
@ -164,8 +168,8 @@ class HAProxy(AgentCheck):
|
||||||
|
|
||||||
def _process_metrics(self, data_list, service, url):
|
def _process_metrics(self, data_list, service, url):
|
||||||
for data in data_list:
|
for data in data_list:
|
||||||
"""
|
"""Each element of data_list is a dictionary related to one host
|
||||||
Each element of data_list is a dictionary related to one host
|
|
||||||
(one line) extracted from the csv. All of these elements should
|
(one line) extracted from the csv. All of these elements should
|
||||||
have the same value for 'pxname' key
|
have the same value for 'pxname' key
|
||||||
It should look like:
|
It should look like:
|
||||||
|
@ -193,8 +197,9 @@ class HAProxy(AgentCheck):
|
||||||
self.gauge(name, value, dimensions=dimensions)
|
self.gauge(name, value, dimensions=dimensions)
|
||||||
|
|
||||||
def _process_events(self, data_list, url):
|
def _process_events(self, data_list, url):
|
||||||
''' Main event processing loop. Events will be created for a service
|
"""Main event processing loop. Events will be created for a service status change.
|
||||||
status change '''
|
|
||||||
|
"""
|
||||||
for data in data_list:
|
for data in data_list:
|
||||||
hostname = data['svname']
|
hostname = data['svname']
|
||||||
service_name = data['pxname']
|
service_name = data['pxname']
|
||||||
|
|
|
@ -4,6 +4,7 @@ from monagent.collector.checks import AgentCheck
|
||||||
class HDFSCheck(AgentCheck):
|
class HDFSCheck(AgentCheck):
|
||||||
|
|
||||||
"""Report on free space and space used in HDFS.
|
"""Report on free space and space used in HDFS.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def check(self, instance):
|
def check(self, instance):
|
||||||
|
|
|
@ -1,22 +1,29 @@
|
||||||
#!/bin/env python
|
#!/bin/env python
|
||||||
"""Monitoring Agent remote host aliveness checker"""
|
"""Monitoring Agent remote host aliveness checker.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from monagent.collector.checks.services_checks import ServicesCheck, Status
|
from monagent.collector.checks.services_checks import ServicesCheck
|
||||||
|
from monagent.collector.checks.services_checks import Status
|
||||||
|
|
||||||
|
|
||||||
class HostAlive(ServicesCheck):
|
class HostAlive(ServicesCheck):
|
||||||
|
|
||||||
"""Inherit ServicesCheck class to test if a host is alive or not"""
|
"""Inherit ServicesCheck class to test if a host is alive or not.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, name, init_config, agent_config, instances=None):
|
def __init__(self, name, init_config, agent_config, instances=None):
|
||||||
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
|
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
|
||||||
|
|
||||||
def _test_ssh(self, host, port, timeout=None):
|
def _test_ssh(self, host, port, timeout=None):
|
||||||
""" Connect to the SSH port (typically 22) and look for a banner """
|
"""Connect to the SSH port (typically 22) and look for a banner.
|
||||||
|
|
||||||
|
"""
|
||||||
if port is None:
|
if port is None:
|
||||||
port = 22
|
port = 22
|
||||||
try:
|
try:
|
||||||
|
@ -46,7 +53,9 @@ class HostAlive(ServicesCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _test_ping(host, timeout=None):
|
def _test_ping(host, timeout=None):
|
||||||
""" Attempt to ping the host """
|
"""Attempt to ping the host.
|
||||||
|
|
||||||
|
"""
|
||||||
ping_prefix = "ping -c 1 -q "
|
ping_prefix = "ping -c 1 -q "
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
ping_prefix += "-W " + str(timeout) + " "
|
ping_prefix += "-W " + str(timeout) + " "
|
||||||
|
@ -70,11 +79,15 @@ class HostAlive(ServicesCheck):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _create_status_event(self, status, msg, instance):
|
def _create_status_event(self, status, msg, instance):
|
||||||
"""Does nothing: status events are not yet supported by Mon API"""
|
"""Does nothing: status events are not yet supported by Mon API.
|
||||||
|
|
||||||
|
"""
|
||||||
return
|
return
|
||||||
|
|
||||||
def _check(self, instance):
|
def _check(self, instance):
|
||||||
"""Run the desired host-alive check againt this host"""
|
"""Run the desired host-alive check againt this host.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
dimensions = {'target_host': instance['host_name'], 'observer_host': socket.getfqdn()}
|
dimensions = {'target_host': instance['host_name'], 'observer_host': socket.getfqdn()}
|
||||||
# Add per-instance dimensions, if any
|
# Add per-instance dimensions, if any
|
||||||
|
|
|
@ -1,15 +1,20 @@
|
||||||
#!/bin/env python
|
#!/bin/env python
|
||||||
"""Monitoring Agent plugin for HTTP/API checks"""
|
"""Monitoring Agent plugin for HTTP/API checks.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import socket
|
|
||||||
import time
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
from httplib2 import Http, HttpLib2Error, httplib
|
from httplib2 import Http
|
||||||
|
from httplib2 import httplib
|
||||||
|
from httplib2 import HttpLib2Error
|
||||||
|
|
||||||
from monagent.collector.checks.services_checks import ServicesCheck, Status
|
|
||||||
from monagent.collector.checks.check import AgentCheck
|
from monagent.collector.checks.check import AgentCheck
|
||||||
|
from monagent.collector.checks.services_checks import ServicesCheck
|
||||||
|
from monagent.collector.checks.services_checks import Status
|
||||||
|
|
||||||
|
|
||||||
class HTTPCheck(ServicesCheck):
|
class HTTPCheck(ServicesCheck):
|
||||||
|
@ -38,7 +43,9 @@ class HTTPCheck(ServicesCheck):
|
||||||
return url, username, password, timeout, include_content, headers, response_time, dimensions, ssl, pattern, use_keystone, token
|
return url, username, password, timeout, include_content, headers, response_time, dimensions, ssl, pattern, use_keystone, token
|
||||||
|
|
||||||
def _create_status_event(self, status, msg, instance):
|
def _create_status_event(self, status, msg, instance):
|
||||||
"""Does nothing: status events are not yet supported by Mon API"""
|
"""Does nothing: status events are not yet supported by Mon API.
|
||||||
|
|
||||||
|
"""
|
||||||
return
|
return
|
||||||
|
|
||||||
def _check(self, instance):
|
def _check(self, instance):
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
|
from collections import defaultdict
|
||||||
|
from glob import glob
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from glob import glob
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from xml.etree.ElementTree import ElementTree
|
from xml.etree.ElementTree import ElementTree
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -13,14 +11,14 @@ except ImportError:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
from monagent.common.util import get_hostname
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
from monagent.common.util import get_hostname
|
||||||
|
|
||||||
|
|
||||||
class Skip(Exception):
|
class Skip(Exception):
|
||||||
|
|
||||||
"""
|
"""Raised by :class:`Jenkins` when it comes across
|
||||||
Raised by :class:`Jenkins` when it comes across
|
|
||||||
a build or job that should be excluded from being checked.
|
a build or job that should be excluded from being checked.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
|
from collections import defaultdict
|
||||||
import sys
|
import sys
|
||||||
|
import random
|
||||||
if sys.version_info < (2, 6):
|
if sys.version_info < (2, 6):
|
||||||
# Normally we'd write our checks to be compatible with >= python 2.4 but
|
# Normally we'd write our checks to be compatible with >= python 2.4 but
|
||||||
# the dependencies of this check are not compatible with 2.4 and would
|
# the dependencies of this check are not compatible with 2.4 and would
|
||||||
# be too much work to rewrite, so raise an exception here.
|
# be too much work to rewrite, so raise an exception here.
|
||||||
raise Exception('kafka_consumer check requires at least Python 2.6')
|
raise Exception('kafka_consumer check requires at least Python 2.6')
|
||||||
|
|
||||||
from collections import defaultdict
|
|
||||||
from monagent.collector.checks import AgentCheck
|
|
||||||
try:
|
try:
|
||||||
from kafka.client import KafkaClient
|
from kafka.client import KafkaClient
|
||||||
from kafka.common import OffsetRequest
|
from kafka.common import OffsetRequest
|
||||||
|
@ -18,7 +17,8 @@ try:
|
||||||
from kazoo.exceptions import NoNodeError
|
from kazoo.exceptions import NoNodeError
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise Exception('Missing python dependency: kazoo (https://github.com/python-zk/kazoo)')
|
raise Exception('Missing python dependency: kazoo (https://github.com/python-zk/kazoo)')
|
||||||
import random
|
|
||||||
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
|
||||||
class KafkaCheck(AgentCheck):
|
class KafkaCheck(AgentCheck):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
|
from collections import defaultdict
|
||||||
import re
|
import re
|
||||||
import urllib2
|
import urllib2
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ whitespace = re.compile(r'\s')
|
||||||
class KyotoTycoonCheck(AgentCheck):
|
class KyotoTycoonCheck(AgentCheck):
|
||||||
|
|
||||||
"""Report statistics about the Kyoto Tycoon DBM-style
|
"""Report statistics about the Kyoto Tycoon DBM-style
|
||||||
|
|
||||||
database server (http://fallabs.com/kyototycoon/)
|
database server (http://fallabs.com/kyototycoon/)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.collector.checks.utils import add_basic_auth
|
from monagent.collector.checks.utils import add_basic_auth
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
class Lighttpd(AgentCheck):
|
class Lighttpd(AgentCheck):
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from monagent.collector.checks import *
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
# Reference: http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
|
# Reference: http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
|
||||||
# Name Type Meaning
|
# Name Type Meaning
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import re
|
import re
|
||||||
import types
|
|
||||||
import time
|
import time
|
||||||
|
import types
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.common.util import get_hostname
|
from monagent.common.util import get_hostname
|
||||||
|
@ -109,7 +109,9 @@ class MongoDb(AgentCheck):
|
||||||
|
|
||||||
def create_event(self, state, server, agentConfig):
|
def create_event(self, state, server, agentConfig):
|
||||||
"""Create an event with a message describing the replication
|
"""Create an event with a message describing the replication
|
||||||
state of a mongo node"""
|
|
||||||
|
state of a mongo node
|
||||||
|
"""
|
||||||
|
|
||||||
def get_state_description(state):
|
def get_state_description(state):
|
||||||
if state == 0:
|
if state == 0:
|
||||||
|
@ -148,8 +150,8 @@ class MongoDb(AgentCheck):
|
||||||
})
|
})
|
||||||
|
|
||||||
def check(self, instance):
|
def check(self, instance):
|
||||||
"""
|
"""Returns a dictionary that looks a lot like what's sent back by db.serverStatus().
|
||||||
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
|
|
||||||
"""
|
"""
|
||||||
if 'server' not in instance:
|
if 'server' not in instance:
|
||||||
self.log.warn("Missing 'server' in mongo config")
|
self.log.warn("Missing 'server' in mongo config")
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import subprocess
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
@ -235,8 +235,8 @@ class MySql(AgentCheck):
|
||||||
return the_type(dict[key])
|
return the_type(dict[key])
|
||||||
|
|
||||||
def _collect_dict(self, metric_type, field_metric_map, query, db, dimensions):
|
def _collect_dict(self, metric_type, field_metric_map, query, db, dimensions):
|
||||||
"""
|
"""Query status and get a dictionary back.
|
||||||
Query status and get a dictionary back.
|
|
||||||
Extract each field out of the dictionary
|
Extract each field out of the dictionary
|
||||||
and stuff it in the corresponding metric.
|
and stuff it in the corresponding metric.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/bin/env python
|
#!/bin/env python
|
||||||
"""Monitoring Agent wrapper for Nagios checks"""
|
"""Monitoring Agent wrapper for Nagios checks.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
@ -14,16 +16,20 @@ from monagent.collector.checks.services_checks import ServicesCheck, Status
|
||||||
|
|
||||||
class WrapNagios(ServicesCheck):
|
class WrapNagios(ServicesCheck):
|
||||||
|
|
||||||
"""Inherit ServicesCheck class to process Nagios checks"""
|
"""Inherit ServicesCheck class to process Nagios checks.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, name, init_config, agent_config, instances=None):
|
def __init__(self, name, init_config, agent_config, instances=None):
|
||||||
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
|
ServicesCheck.__init__(self, name, init_config, agent_config, instances)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _do_skip_check(instance, last_run_data):
|
def _do_skip_check(instance, last_run_data):
|
||||||
""" Determine whether or not to skip a check depending on
|
"""Determine whether or not to skip a check depending on
|
||||||
the checks's check_interval, if specified, and the last
|
|
||||||
time the check was run """
|
the checks's check_interval, if specified, and the last
|
||||||
|
time the check was run
|
||||||
|
"""
|
||||||
if instance['service_name'] in last_run_data and 'check_interval' in instance:
|
if instance['service_name'] in last_run_data and 'check_interval' in instance:
|
||||||
if time.time() < last_run_data[instance['service_name']] + instance['check_interval']:
|
if time.time() < last_run_data[instance['service_name']] + instance['check_interval']:
|
||||||
return True
|
return True
|
||||||
|
@ -31,11 +37,15 @@ class WrapNagios(ServicesCheck):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _create_status_event(self, status, msg, instance):
|
def _create_status_event(self, status, msg, instance):
|
||||||
"""Does nothing: status events are not yet supported by Mon API"""
|
"""Does nothing: status events are not yet supported by Mon API.
|
||||||
|
|
||||||
|
"""
|
||||||
return
|
return
|
||||||
|
|
||||||
def _check(self, instance):
|
def _check(self, instance):
|
||||||
"""Run the command specified by check_command and capture the result"""
|
"""Run the command specified by check_command and capture the result.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
dimensions = {'observer_host': socket.getfqdn()}
|
dimensions = {'observer_host': socket.getfqdn()}
|
||||||
# Add per-instance dimensions, if any
|
# Add per-instance dimensions, if any
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
"""
|
"""Collects network metrics.
|
||||||
Collects network metrics.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# stdlib
|
# stdlib
|
||||||
import subprocess
|
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
# project
|
# project
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
@ -254,8 +254,8 @@ class Network(AgentCheck):
|
||||||
self._submit_devicemetrics(interface, metrics)
|
self._submit_devicemetrics(interface, metrics)
|
||||||
|
|
||||||
def _parse_solaris_netstat(self, netstat_output):
|
def _parse_solaris_netstat(self, netstat_output):
|
||||||
"""
|
"""Return a mapping of network metrics by interface. For example:
|
||||||
Return a mapping of network metrics by interface. For example:
|
|
||||||
{ interface:
|
{ interface:
|
||||||
{'bytes_out': 0,
|
{'bytes_out': 0,
|
||||||
'bytes_in': 0,
|
'bytes_in': 0,
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
import re
|
import re
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
from monagent.common.util import headers
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.collector.checks.utils import add_basic_auth
|
from monagent.collector.checks.utils import add_basic_auth
|
||||||
|
from monagent.common.util import headers
|
||||||
|
|
||||||
|
|
||||||
class Nginx(AgentCheck):
|
class Nginx(AgentCheck):
|
||||||
|
|
||||||
"""Tracks basic nginx metrics via the status module
|
"""Tracks basic nginx metrics via the status module
|
||||||
|
|
||||||
* number of connections
|
* number of connections
|
||||||
* number of requets per second
|
* number of requets per second
|
||||||
|
|
||||||
|
@ -20,7 +21,6 @@ class Nginx(AgentCheck):
|
||||||
server accepts handled requests
|
server accepts handled requests
|
||||||
1156958 1156958 4491319
|
1156958 1156958 4491319
|
||||||
Reading: 0 Writing: 2 Waiting: 6
|
Reading: 0 Writing: 2 Waiting: 6
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def check(self, instance):
|
def check(self, instance):
|
||||||
|
|
|
@ -8,7 +8,8 @@ class ShouldRestartException(Exception):
|
||||||
|
|
||||||
class PostgreSql(AgentCheck):
|
class PostgreSql(AgentCheck):
|
||||||
|
|
||||||
"""Collects per-database, and optionally per-relation metrics
|
"""Collects per-database, and optionally per-relation metrics.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
RATE = AgentCheck.rate
|
RATE = AgentCheck.rate
|
||||||
|
@ -120,6 +121,7 @@ SELECT relname,
|
||||||
|
|
||||||
def _collect_stats(self, key, db, dimensions, relations):
|
def _collect_stats(self, key, db, dimensions, relations):
|
||||||
"""Query pg_stat_* for various metrics
|
"""Query pg_stat_* for various metrics
|
||||||
|
|
||||||
If relations is not an empty list, gather per-relation metrics
|
If relations is not an empty list, gather per-relation metrics
|
||||||
on top of that.
|
on top of that.
|
||||||
"""
|
"""
|
||||||
|
@ -190,7 +192,9 @@ SELECT relname,
|
||||||
[v[0][1](self, v[0][0], v[1], dimensions=dimensions) for v in values]
|
[v[0][1](self, v[0][0], v[1], dimensions=dimensions) for v in values]
|
||||||
|
|
||||||
def get_connection(self, key, host, port, user, password, dbname, use_cached=True):
|
def get_connection(self, key, host, port, user, password, dbname, use_cached=True):
|
||||||
"Get and memoize connections to instances"
|
"""Get and memorize connections to instances.
|
||||||
|
|
||||||
|
"""
|
||||||
if key in self.dbs and use_cached:
|
if key in self.dbs and use_cached:
|
||||||
return self.dbs[key]
|
return self.dbs[key]
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
"""Gather metrics on specific processes"""
|
"""Gather metrics on specific processes.
|
||||||
|
|
||||||
|
"""
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
from monagent.common.util import Platform
|
from monagent.common.util import Platform
|
||||||
|
|
||||||
|
@ -28,8 +30,8 @@ class ProcessCheck(AgentCheck):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def find_pids(self, search_string, psutil, exact_match=True):
|
def find_pids(self, search_string, psutil, exact_match=True):
|
||||||
"""
|
"""Create a set of pids of selected processes.
|
||||||
Create a set of pids of selected processes.
|
|
||||||
Search for search_string
|
Search for search_string
|
||||||
"""
|
"""
|
||||||
found_process_list = []
|
found_process_list = []
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import urllib2
|
import urllib2
|
||||||
import urlparse
|
import urlparse
|
||||||
import time
|
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ METRIC_SUFFIX = {QUEUE_TYPE: "queue", NODE_TYPE: "node"}
|
||||||
class RabbitMQ(AgentCheck):
|
class RabbitMQ(AgentCheck):
|
||||||
|
|
||||||
"""This check is for gathering statistics from the RabbitMQ
|
"""This check is for gathering statistics from the RabbitMQ
|
||||||
|
|
||||||
Management Plugin (http://www.rabbitmq.com/management.html)
|
Management Plugin (http://www.rabbitmq.com/management.html)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -104,8 +105,8 @@ class RabbitMQ(AgentCheck):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_stats(self, instance, base_url, object_type, max_detailed, specified_list):
|
def get_stats(self, instance, base_url, object_type, max_detailed, specified_list):
|
||||||
"""
|
"""instance: the check instance
|
||||||
instance: the check instance
|
|
||||||
base_url: the url of the rabbitmq management api (e.g. http://localhost:15672/api)
|
base_url: the url of the rabbitmq management api (e.g. http://localhost:15672/api)
|
||||||
object_type: either QUEUE_TYPE or NODE_TYPE
|
object_type: either QUEUE_TYPE or NODE_TYPE
|
||||||
max_detailed: the limit of objects to collect for this type
|
max_detailed: the limit of objects to collect for this type
|
||||||
|
@ -116,7 +117,8 @@ class RabbitMQ(AgentCheck):
|
||||||
# Make a copy of this list as we will remove items from it at each iteration
|
# Make a copy of this list as we will remove items from it at each iteration
|
||||||
specified_items = list(specified_list)
|
specified_items = list(specified_list)
|
||||||
|
|
||||||
""" data is a list of nodes or queues:
|
"""data is a list of nodes or queues:
|
||||||
|
|
||||||
data = [
|
data = [
|
||||||
{'status': 'running', 'node': 'rabbit@host', 'name': 'queue1', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
|
{'status': 'running', 'node': 'rabbit@host', 'name': 'queue1', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
|
||||||
{'status': 'running', 'node': 'rabbit@host, 'name': 'queue10', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
|
{'status': 'running', 'node': 'rabbit@host, 'name': 'queue10', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
'''
|
"""Redis checks.
|
||||||
Redis checks
|
|
||||||
'''
|
"""
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -84,7 +84,9 @@ class Redis(AgentCheck):
|
||||||
return {"redis": version}
|
return {"redis": version}
|
||||||
|
|
||||||
def _parse_dict_string(self, string, key, default):
|
def _parse_dict_string(self, string, key, default):
|
||||||
"""Take from a more recent redis.py, parse_info"""
|
"""Take from a more recent redis.py, parse_info.
|
||||||
|
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
for item in string.split(','):
|
for item in string.split(','):
|
||||||
k, v = item.rsplit('=', 1)
|
k, v = item.rsplit('=', 1)
|
||||||
|
@ -94,7 +96,7 @@ class Redis(AgentCheck):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return v
|
return v
|
||||||
return default
|
return default
|
||||||
except Exception as e:
|
except Exception:
|
||||||
self.log.exception("Cannot parse dictionary string: %s" % string)
|
self.log.exception("Cannot parse dictionary string: %s" % string)
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
@ -144,7 +146,7 @@ class Redis(AgentCheck):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
try:
|
try:
|
||||||
info = conn.info()
|
info = conn.info()
|
||||||
except ValueError as e:
|
except ValueError:
|
||||||
# This is likely a know issue with redis library 2.0.0
|
# This is likely a know issue with redis library 2.0.0
|
||||||
# See https://github.com/DataDog/dd-agent/issues/374 for details
|
# See https://github.com/DataDog/dd-agent/issues/374 for details
|
||||||
import redis
|
import redis
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
from httplib2 import Http
|
||||||
|
from httplib2 import HttpLib2Error
|
||||||
import json
|
import json
|
||||||
import time
|
|
||||||
import socket
|
import socket
|
||||||
|
import time
|
||||||
from httplib2 import Http, HttpLib2Error
|
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
@ -60,15 +60,15 @@ class Riak(AgentCheck):
|
||||||
h = Http(timeout=timeout)
|
h = Http(timeout=timeout)
|
||||||
resp, content = h.request(url, "GET")
|
resp, content = h.request(url, "GET")
|
||||||
|
|
||||||
except socket.timeout as e:
|
except socket.timeout:
|
||||||
self.timeout_event(url, timeout, aggregation_key)
|
self.timeout_event(url, timeout, aggregation_key)
|
||||||
return
|
return
|
||||||
|
|
||||||
except socket.error as e:
|
except socket.error:
|
||||||
self.timeout_event(url, timeout, aggregation_key)
|
self.timeout_event(url, timeout, aggregation_key)
|
||||||
return
|
return
|
||||||
|
|
||||||
except HttpLib2Error as e:
|
except HttpLib2Error:
|
||||||
self.timeout_event(url, timeout, aggregation_key)
|
self.timeout_event(url, timeout, aggregation_key)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
'''
|
"""Check the performance counters from SQL Server.
|
||||||
Check the performance counters from SQL Server
|
|
||||||
'''
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
@ -39,14 +39,16 @@ class SQLServer(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _conn_key(host, username, password, database):
|
def _conn_key(host, username, password, database):
|
||||||
''' Return a key to use for the connection cache
|
"""Return a key to use for the connection cache.
|
||||||
'''
|
|
||||||
|
"""
|
||||||
return '%s:%s:%s:%s' % (host, username, password, database)
|
return '%s:%s:%s:%s' % (host, username, password, database)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _conn_string(host, username, password, database):
|
def _conn_string(host, username, password, database):
|
||||||
''' Return a connection string to use with adodbapi
|
"""Return a connection string to use with adodbapi.
|
||||||
'''
|
|
||||||
|
"""
|
||||||
conn_str = 'Provider=SQLOLEDB;Data Source=%s;Initial Catalog=%s;' % (host, database)
|
conn_str = 'Provider=SQLOLEDB;Data Source=%s;Initial Catalog=%s;' % (host, database)
|
||||||
if username:
|
if username:
|
||||||
conn_str += 'User ID=%s;' % (username)
|
conn_str += 'User ID=%s;' % (username)
|
||||||
|
@ -74,7 +76,7 @@ class SQLServer(AgentCheck):
|
||||||
conn_str = self._conn_string(host, username, password, database)
|
conn_str = self._conn_string(host, username, password, database)
|
||||||
conn = adodbapi.connect(conn_str)
|
conn = adodbapi.connect(conn_str)
|
||||||
self.connections[conn_key] = conn
|
self.connections[conn_key] = conn
|
||||||
except Exception as e:
|
except Exception:
|
||||||
cx = "%s - %s" % (host, database)
|
cx = "%s - %s" % (host, database)
|
||||||
raise Exception("Unable to connect to SQL Server for instance %s.\n %s"
|
raise Exception("Unable to connect to SQL Server for instance %s.\n %s"
|
||||||
% (cx, traceback.format_exc()))
|
% (cx, traceback.format_exc()))
|
||||||
|
@ -84,8 +86,9 @@ class SQLServer(AgentCheck):
|
||||||
self._fetch_metrics(cursor, dimensions)
|
self._fetch_metrics(cursor, dimensions)
|
||||||
|
|
||||||
def _fetch_metrics(self, cursor, custom_dimensions):
|
def _fetch_metrics(self, cursor, custom_dimensions):
|
||||||
''' Fetch the metrics from the sys.dm_os_performance_counters table
|
"""Fetch the metrics from the sys.dm_os_performance_counters table.
|
||||||
'''
|
|
||||||
|
"""
|
||||||
for metric in self.METRICS:
|
for metric in self.METRICS:
|
||||||
# Normalize all rows to the same size for easy of use
|
# Normalize all rows to the same size for easy of use
|
||||||
if len(metric) == 3:
|
if len(metric) == 3:
|
||||||
|
@ -100,7 +103,7 @@ class SQLServer(AgentCheck):
|
||||||
if instance_n == ALL_INSTANCES:
|
if instance_n == ALL_INSTANCES:
|
||||||
try:
|
try:
|
||||||
self._fetch_all_instances(metric, cursor, custom_dimensions)
|
self._fetch_all_instances(metric, cursor, custom_dimensions)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
self.log.exception('Unable to fetch metric: %s' % mname)
|
self.log.exception('Unable to fetch metric: %s' % mname)
|
||||||
self.warning('Unable to fetch metric: %s' % mname)
|
self.warning('Unable to fetch metric: %s' % mname)
|
||||||
else:
|
else:
|
||||||
|
@ -112,7 +115,7 @@ class SQLServer(AgentCheck):
|
||||||
and instance_name = ?
|
and instance_name = ?
|
||||||
""", (counter, instance_n))
|
""", (counter, instance_n))
|
||||||
(value,) = cursor.fetchone()
|
(value,) = cursor.fetchone()
|
||||||
except Exception as e:
|
except Exception:
|
||||||
self.log.exception('Unable to fetch metric: %s' % mname)
|
self.log.exception('Unable to fetch metric: %s' % mname)
|
||||||
self.warning('Unable to fetch metric: %s' % mname)
|
self.warning('Unable to fetch metric: %s' % mname)
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from monagent.collector.checks.services_checks import ServicesCheck, Status, EventType
|
from monagent.collector.checks.services_checks import EventType
|
||||||
|
from monagent.collector.checks.services_checks import ServicesCheck
|
||||||
|
from monagent.collector.checks.services_checks import Status
|
||||||
|
|
||||||
|
|
||||||
class BadConfException(Exception):
|
class BadConfException(Exception):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import xml.parsers.expat # python 2.4 compatible
|
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import xml.parsers.expat # python 2.4 compatible
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
'''
|
"""Monitor the Windows Event Log.
|
||||||
Monitor the Windows Event Log
|
|
||||||
'''
|
"""
|
||||||
from datetime import datetime
|
|
||||||
import calendar
|
import calendar
|
||||||
|
from datetime import datetime
|
||||||
try:
|
try:
|
||||||
import wmi
|
import wmi
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -75,9 +75,10 @@ class Win32EventLog(AgentCheck):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _instance_key(instance):
|
def _instance_key(instance):
|
||||||
''' Generate a unique key per instance for use with keeping track of
|
"""Generate a unique key per instance for use with keeping track of
|
||||||
state for each instance.
|
|
||||||
'''
|
state for each instance.
|
||||||
|
"""
|
||||||
return '%s' % (instance)
|
return '%s' % (instance)
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,7 +96,9 @@ class EventLogQuery(object):
|
||||||
self.start_ts = start_ts
|
self.start_ts = start_ts
|
||||||
|
|
||||||
def to_wql(self):
|
def to_wql(self):
|
||||||
''' Return this query as a WQL string. '''
|
"""Return this query as a WQL string.
|
||||||
|
|
||||||
|
"""
|
||||||
wql = """
|
wql = """
|
||||||
SELECT Message, SourceName, TimeGenerated, Type, User, InsertionStrings
|
SELECT Message, SourceName, TimeGenerated, Type, User, InsertionStrings
|
||||||
FROM Win32_NTLogEvent
|
FROM Win32_NTLogEvent
|
||||||
|
@ -125,9 +128,10 @@ class EventLogQuery(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _add_message_filter(msg_filter, q):
|
def _add_message_filter(msg_filter, q):
|
||||||
''' Filter on the message text using a LIKE query. If the filter starts
|
"""Filter on the message text using a LIKE query. If the filter starts
|
||||||
with '-' then we'll assume that it's a NOT LIKE filter.
|
|
||||||
'''
|
with '-' then we'll assume that it's a NOT LIKE filter.
|
||||||
|
"""
|
||||||
if msg_filter.startswith('-'):
|
if msg_filter.startswith('-'):
|
||||||
msg_filter = msg_filter[1:]
|
msg_filter = msg_filter[1:]
|
||||||
q += '\nAND NOT Message LIKE "%s"' % msg_filter
|
q += '\nAND NOT Message LIKE "%s"' % msg_filter
|
||||||
|
@ -137,18 +141,19 @@ class EventLogQuery(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _dt_to_wmi(dt):
|
def _dt_to_wmi(dt):
|
||||||
''' A wrapper around wmi.from_time to get a WMI-formatted time from a
|
"""A wrapper around wmi.from_time to get a WMI-formatted time from a time struct.
|
||||||
time struct.
|
|
||||||
'''
|
"""
|
||||||
return wmi.from_time(year=dt.year, month=dt.month, day=dt.day,
|
return wmi.from_time(year=dt.year, month=dt.month, day=dt.day,
|
||||||
hours=dt.hour, minutes=dt.minute, seconds=dt.second, microseconds=0,
|
hours=dt.hour, minutes=dt.minute, seconds=dt.second, microseconds=0,
|
||||||
timezone=0)
|
timezone=0)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _convert_event_types(types):
|
def _convert_event_types(types):
|
||||||
''' Detect if we are running on <= Server 2003. If so, we should convert
|
"""Detect if we are running on <= Server 2003. If so, we should convert
|
||||||
|
|
||||||
the EventType values to integers
|
the EventType values to integers
|
||||||
'''
|
"""
|
||||||
return types
|
return types
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,15 +182,18 @@ class LogEvent(object):
|
||||||
}
|
}
|
||||||
|
|
||||||
def is_after(self, ts):
|
def is_after(self, ts):
|
||||||
''' Compare this event's timestamp to a give timestamp. '''
|
"""Compare this event's timestamp to a give timestamp.
|
||||||
|
|
||||||
|
"""
|
||||||
if self.timestamp >= int(calendar.timegm(ts.timetuple())):
|
if self.timestamp >= int(calendar.timegm(ts.timetuple())):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _wmi_to_ts(wmi_ts):
|
def _wmi_to_ts(wmi_ts):
|
||||||
''' Convert a wmi formatted timestamp into an epoch using wmi.to_time().
|
"""Convert a wmi formatted timestamp into an epoch using wmi.to_time().
|
||||||
'''
|
|
||||||
|
"""
|
||||||
year, month, day, hour, minute, second, microsecond, tz = \
|
year, month, day, hour, minute, second, microsecond, tz = \
|
||||||
wmi.to_time(wmi_ts)
|
wmi.to_time(wmi_ts)
|
||||||
dt = datetime(year=year, month=month, day=day, hour=hour, minute=minute,
|
dt = datetime(year=year, month=month, day=day, hour=hour, minute=minute,
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
'''
|
"""Windows Only.
|
||||||
Windows Only.
|
|
||||||
|
|
||||||
Generic WMI check. This check allows you to specify particular metrics that you
|
Generic WMI check. This check allows you to specify particular metrics that you
|
||||||
want from WMI in your configuration. Check wmi.yaml.example in your conf.d
|
want from WMI in your configuration. Check wmi.yaml.example in your conf.d
|
||||||
directory for more details on configuration.
|
directory for more details on configuration.
|
||||||
'''
|
"""
|
||||||
try:
|
try:
|
||||||
import wmi
|
import wmi
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
'''
|
"""Parses the response from zookeeper's `stat` admin command, which looks like:
|
||||||
Parses the response from zookeeper's `stat` admin command, which looks like:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
|
Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
|
||||||
|
@ -21,13 +20,12 @@ Node count: 487
|
||||||
```
|
```
|
||||||
|
|
||||||
Tested with Zookeeper versions 3.0.0 to 3.4.5
|
Tested with Zookeeper versions 3.0.0 to 3.4.5
|
||||||
|
"""
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import struct
|
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
import struct
|
||||||
|
|
||||||
from monagent.collector.checks import AgentCheck
|
from monagent.collector.checks import AgentCheck
|
||||||
|
|
||||||
|
@ -87,9 +85,10 @@ class Zookeeper(AgentCheck):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse_stat(cls, buf):
|
def parse_stat(cls, buf):
|
||||||
''' `buf` is a readable file-like object
|
"""`buf` is a readable file-like object
|
||||||
returns a tuple: ([(metric_name, value)], dimensions)
|
|
||||||
'''
|
returns a tuple: ([(metric_name, value)], dimensions)
|
||||||
|
"""
|
||||||
metrics = []
|
metrics = []
|
||||||
buf.seek(0)
|
buf.seek(0)
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,25 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# set up logging before importing any other components
|
|
||||||
from monagent.common.config import get_version, initialize_logging
|
|
||||||
initialize_logging('collector')
|
|
||||||
|
|
||||||
import os
|
|
||||||
os.umask(0o22)
|
|
||||||
|
|
||||||
# Core modules
|
# Core modules
|
||||||
|
import glob
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import glob
|
|
||||||
|
# Custom modules
|
||||||
|
import checks.collector
|
||||||
|
import jmxfetch
|
||||||
|
import monagent.common.check_status
|
||||||
|
import monagent.common.config
|
||||||
|
import monagent.common.daemon
|
||||||
|
import monagent.common.emitter
|
||||||
|
import monagent.common.util
|
||||||
|
|
||||||
|
# set up logging before importing any other components
|
||||||
|
monagent.common.config.initialize_logging('collector')
|
||||||
|
os.umask(0o22)
|
||||||
|
|
||||||
# Check we're not using an old version of Python. We need 2.4 above because
|
# Check we're not using an old version of Python. We need 2.4 above because
|
||||||
# some modules (like subprocess) were only introduced in 2.4.
|
# some modules (like subprocess) were only introduced in 2.4.
|
||||||
|
@ -21,16 +27,6 @@ if int(sys.version_info[1]) <= 3:
|
||||||
sys.stderr.write("Monasca Agent requires python 2.4 or later.\n")
|
sys.stderr.write("Monasca Agent requires python 2.4 or later.\n")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
# Custom modules
|
|
||||||
from checks.collector import Collector
|
|
||||||
from monagent.common.check_status import CollectorStatus, ForwarderStatus
|
|
||||||
from monagent.common.config import get_config, get_parsed_args, load_check_directory, get_confd_path, check_yaml, get_logging_config
|
|
||||||
from monagent.common.daemon import Daemon, AgentSupervisor
|
|
||||||
from monagent.common.emitter import http_emitter
|
|
||||||
from monagent.common.util import Watchdog, PidFile, get_os
|
|
||||||
from jmxfetch import JMXFetch, JMX_LIST_COMMANDS
|
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
PID_NAME = "monasca-agent"
|
PID_NAME = "monasca-agent"
|
||||||
WATCHDOG_MULTIPLIER = 10
|
WATCHDOG_MULTIPLIER = 10
|
||||||
|
@ -43,14 +39,14 @@ log = logging.getLogger('collector')
|
||||||
|
|
||||||
# todo the collector has daemon code but is always run in foreground mode
|
# todo the collector has daemon code but is always run in foreground mode
|
||||||
# from the supervisor, is there a reason for the daemon code then?
|
# from the supervisor, is there a reason for the daemon code then?
|
||||||
class CollectorDaemon(Daemon):
|
class CollectorDaemon(monagent.common.daemon.Daemon):
|
||||||
|
|
||||||
|
"""The agent class is a daemon that runs the collector in a background process.
|
||||||
|
|
||||||
"""
|
|
||||||
The agent class is a daemon that runs the collector in a background process.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, pidfile, autorestart, start_event=True):
|
def __init__(self, pidfile, autorestart, start_event=True):
|
||||||
Daemon.__init__(self, pidfile, autorestart=autorestart)
|
monagent.common.daemon.Daemon.__init__(self, pidfile, autorestart=autorestart)
|
||||||
self.run_forever = True
|
self.run_forever = True
|
||||||
self.collector = None
|
self.collector = None
|
||||||
self.start_event = start_event
|
self.start_event = start_event
|
||||||
|
@ -59,8 +55,8 @@ class CollectorDaemon(Daemon):
|
||||||
log.debug("Caught sigterm. Stopping run loop.")
|
log.debug("Caught sigterm. Stopping run loop.")
|
||||||
self.run_forever = False
|
self.run_forever = False
|
||||||
|
|
||||||
if JMXFetch.is_running():
|
if jmxfetch.JMXFetch.is_running():
|
||||||
JMXFetch.stop()
|
jmxfetch.JMXFetch.stop()
|
||||||
|
|
||||||
if self.collector:
|
if self.collector:
|
||||||
self.collector.stop()
|
self.collector.stop()
|
||||||
|
@ -72,10 +68,12 @@ class CollectorDaemon(Daemon):
|
||||||
|
|
||||||
def info(self, verbose=None):
|
def info(self, verbose=None):
|
||||||
logging.getLogger().setLevel(logging.ERROR)
|
logging.getLogger().setLevel(logging.ERROR)
|
||||||
return CollectorStatus.print_latest_status(verbose=verbose)
|
return monagent.common.check_status.CollectorStatus.print_latest_status(verbose=verbose)
|
||||||
|
|
||||||
def run(self, config=None):
|
def run(self, config=None):
|
||||||
"""Main loop of the collector"""
|
"""Main loop of the collector.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
# Gracefully exit on sigterm.
|
# Gracefully exit on sigterm.
|
||||||
signal.signal(signal.SIGTERM, self._handle_sigterm)
|
signal.signal(signal.SIGTERM, self._handle_sigterm)
|
||||||
|
@ -87,15 +85,15 @@ class CollectorDaemon(Daemon):
|
||||||
signal.signal(signal.SIGINT, self._handle_sigterm)
|
signal.signal(signal.SIGINT, self._handle_sigterm)
|
||||||
|
|
||||||
# Save the agent start-up stats.
|
# Save the agent start-up stats.
|
||||||
CollectorStatus().persist()
|
monagent.common.check_status.CollectorStatus().persist()
|
||||||
|
|
||||||
# Intialize the collector.
|
# Intialize the collector.
|
||||||
if config is None:
|
if config is None:
|
||||||
config = get_config(parse_args=True)
|
config = monagent.common.config.get_config(parse_args=True)
|
||||||
|
|
||||||
# Load the checks_d checks
|
# Load the checks_d checks
|
||||||
checksd = load_check_directory(config)
|
checksd = monagent.common.config.load_check_directory(config)
|
||||||
self.collector = Collector(config, http_emitter, checksd)
|
self.collector = checks.collector.Collector(config, monagent.common.emitter.http_emitter, checksd)
|
||||||
|
|
||||||
# Configure the watchdog.
|
# Configure the watchdog.
|
||||||
check_frequency = int(config['check_freq'])
|
check_frequency = int(config['check_freq'])
|
||||||
|
@ -127,9 +125,9 @@ class CollectorDaemon(Daemon):
|
||||||
if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled:
|
if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled:
|
||||||
try:
|
try:
|
||||||
profiler.disable()
|
profiler.disable()
|
||||||
|
import cStringIO
|
||||||
import pstats
|
import pstats
|
||||||
from cStringIO import StringIO
|
s = cStringIO.StringIO()
|
||||||
s = StringIO()
|
|
||||||
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
|
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
|
||||||
ps.print_stats()
|
ps.print_stats()
|
||||||
log.debug(s.getvalue())
|
log.debug(s.getvalue())
|
||||||
|
@ -149,7 +147,7 @@ class CollectorDaemon(Daemon):
|
||||||
|
|
||||||
# Now clean-up.
|
# Now clean-up.
|
||||||
try:
|
try:
|
||||||
CollectorStatus.remove_latest_status()
|
monagent.common.check_status.CollectorStatus.remove_latest_status()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -162,8 +160,9 @@ class CollectorDaemon(Daemon):
|
||||||
def _get_watchdog(check_freq, agentConfig):
|
def _get_watchdog(check_freq, agentConfig):
|
||||||
watchdog = None
|
watchdog = None
|
||||||
if agentConfig.get("watchdog", True):
|
if agentConfig.get("watchdog", True):
|
||||||
watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER,
|
watchdog = monagent.common.util.Watchdog(check_freq * WATCHDOG_MULTIPLIER,
|
||||||
max_mem_mb=agentConfig.get('limit_memory_consumption', None))
|
max_mem_mb=agentConfig.get('limit_memory_consumption',
|
||||||
|
None))
|
||||||
watchdog.reset()
|
watchdog.reset()
|
||||||
return watchdog
|
return watchdog
|
||||||
|
|
||||||
|
@ -176,12 +175,12 @@ class CollectorDaemon(Daemon):
|
||||||
log.info("Running an auto-restart.")
|
log.info("Running an auto-restart.")
|
||||||
if self.collector:
|
if self.collector:
|
||||||
self.collector.stop()
|
self.collector.stop()
|
||||||
sys.exit(AgentSupervisor.RESTART_EXIT_STATUS)
|
sys.exit(monagent.common.daemon.AgentSupervisor.RESTART_EXIT_STATUS)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
options, args = get_parsed_args()
|
options, args = monagent.common.config.get_parsed_args()
|
||||||
agentConfig = get_config(options=options)
|
agentConfig = monagent.common.config.get_config(options=options)
|
||||||
# todo autorestart isn't used remove
|
# todo autorestart isn't used remove
|
||||||
autorestart = agentConfig.get('autorestart', False)
|
autorestart = agentConfig.get('autorestart', False)
|
||||||
|
|
||||||
|
@ -207,7 +206,7 @@ def main():
|
||||||
sys.stderr.write("Unknown command: %s\n" % command)
|
sys.stderr.write("Unknown command: %s\n" % command)
|
||||||
return 3
|
return 3
|
||||||
|
|
||||||
pid_file = PidFile('monasca-agent')
|
pid_file = monagent.common.util.PidFile('monasca-agent')
|
||||||
|
|
||||||
if options.clean:
|
if options.clean:
|
||||||
pid_file.clean()
|
pid_file.clean()
|
||||||
|
@ -215,7 +214,7 @@ def main():
|
||||||
agent = CollectorDaemon(pid_file.get_path(), autorestart)
|
agent = CollectorDaemon(pid_file.get_path(), autorestart)
|
||||||
|
|
||||||
if command in START_COMMANDS:
|
if command in START_COMMANDS:
|
||||||
log.info('Agent version %s' % get_version())
|
log.info('Agent version %s' % monagent.common.config.get_version())
|
||||||
|
|
||||||
if 'start' == command:
|
if 'start' == command:
|
||||||
log.info('Start daemon')
|
log.info('Start daemon')
|
||||||
|
@ -247,34 +246,29 @@ def main():
|
||||||
def parent_func():
|
def parent_func():
|
||||||
agent.start_event = False
|
agent.start_event = False
|
||||||
|
|
||||||
AgentSupervisor.start(parent_func, child_func)
|
monagent.common.daemon.AgentSupervisor.start(parent_func, child_func)
|
||||||
else:
|
else:
|
||||||
# Run in the standard foreground.
|
# Run in the standard foreground.
|
||||||
agent.run(config=agentConfig)
|
agent.run(config=agentConfig)
|
||||||
|
|
||||||
elif 'check' == command:
|
elif 'check' == command:
|
||||||
check_name = args[1]
|
check_name = args[1]
|
||||||
try:
|
checks = monagent.common.config.load_check_directory(agentConfig)
|
||||||
# Try the old-style check first
|
for check in checks['initialized_checks']:
|
||||||
print(getattr(collector.checks.collector, check_name)(log).check(agentConfig))
|
if check.name == check_name:
|
||||||
except Exception:
|
check.run()
|
||||||
# If not an old-style check, try checks_d
|
print("Metrics: ")
|
||||||
checks = load_check_directory(agentConfig)
|
check.get_metrics(prettyprint=True)
|
||||||
for check in checks['initialized_checks']:
|
if len(args) == 3 and args[2] == 'check_rate':
|
||||||
if check.name == check_name:
|
print("Running 2nd iteration to capture rate metrics")
|
||||||
|
time.sleep(1)
|
||||||
check.run()
|
check.run()
|
||||||
print("Metrics: ")
|
print("Metrics: ")
|
||||||
check.get_metrics(prettyprint=True)
|
check.get_metrics(prettyprint=True)
|
||||||
if len(args) == 3 and args[2] == 'check_rate':
|
|
||||||
print("Running 2nd iteration to capture rate metrics")
|
|
||||||
time.sleep(1)
|
|
||||||
check.run()
|
|
||||||
print("Metrics: ")
|
|
||||||
check.get_metrics(prettyprint=True)
|
|
||||||
|
|
||||||
elif 'check_all' == command:
|
elif 'check_all' == command:
|
||||||
print("Loading check directory...")
|
print("Loading check directory...")
|
||||||
checks = load_check_directory(agentConfig)
|
checks = monagent.common.config.load_check_directory(agentConfig)
|
||||||
print("...directory loaded.\n")
|
print("...directory loaded.\n")
|
||||||
for check in checks['initialized_checks']:
|
for check in checks['initialized_checks']:
|
||||||
print("#" * 80)
|
print("#" * 80)
|
||||||
|
@ -285,12 +279,12 @@ def main():
|
||||||
print("#" * 80 + "\n\n")
|
print("#" * 80 + "\n\n")
|
||||||
|
|
||||||
elif 'configcheck' == command or 'configtest' == command:
|
elif 'configcheck' == command or 'configtest' == command:
|
||||||
osname = get_os()
|
osname = monagent.common.util.get_os()
|
||||||
all_valid = True
|
all_valid = True
|
||||||
for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")):
|
for conf_path in glob.glob(os.path.join(monagent.common.config.get_confd_path(osname), "*.yaml")):
|
||||||
basename = os.path.basename(conf_path)
|
basename = os.path.basename(conf_path)
|
||||||
try:
|
try:
|
||||||
check_yaml(conf_path)
|
monagent.common.config.check_yaml(conf_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
all_valid = False
|
all_valid = False
|
||||||
print("%s contains errors:\n %s" % (basename, e))
|
print("%s contains errors:\n %s" % (basename, e))
|
||||||
|
@ -307,14 +301,14 @@ def main():
|
||||||
|
|
||||||
elif 'jmx' == command:
|
elif 'jmx' == command:
|
||||||
|
|
||||||
if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
|
if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
|
||||||
print("#" * 80)
|
print("#" * 80)
|
||||||
print("JMX tool to be used to help configuring your JMX checks.")
|
print("JMX tool to be used to help configuring your JMX checks.")
|
||||||
print("See http://docs.datadoghq.com/integrations/java/ for more information")
|
print("See http://docs.datadoghq.com/integrations/java/ for more information")
|
||||||
print("#" * 80)
|
print("#" * 80)
|
||||||
print("\n")
|
print("\n")
|
||||||
print("You have to specify one of the following command:")
|
print("You have to specify one of the following command:")
|
||||||
for command, desc in JMX_LIST_COMMANDS.iteritems():
|
for command, desc in jmxfetch.JMX_LIST_COMMANDS.iteritems():
|
||||||
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
|
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
|
||||||
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
|
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
|
||||||
print("\n")
|
print("\n")
|
||||||
|
@ -322,11 +316,11 @@ def main():
|
||||||
else:
|
else:
|
||||||
jmx_command = args[1]
|
jmx_command = args[1]
|
||||||
checks_list = args[2:]
|
checks_list = args[2:]
|
||||||
confd_directory = get_confd_path(get_os())
|
confd_directory = monagent.common.config.get_confd_path(monagent.common.util.get_os())
|
||||||
should_run = JMXFetch.init(
|
should_run = jmxfetch.JMXFetch.init(
|
||||||
confd_directory,
|
confd_directory,
|
||||||
agentConfig,
|
agentConfig,
|
||||||
get_logging_config(),
|
monagent.common.config.get_logging_config(),
|
||||||
15,
|
15,
|
||||||
jmx_command,
|
jmx_command,
|
||||||
checks_list,
|
checks_list,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from collector.dogstream import common
|
from monagent.collector.dogstream import common
|
||||||
|
|
||||||
|
|
||||||
LOG4J_PRIORITY = [
|
LOG4J_PRIORITY = [
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
"""
|
"""Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
|
||||||
Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
|
|
||||||
|
|
||||||
Add to datadog.conf as follows:
|
Add to datadog.conf as follows:
|
||||||
|
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
|
||||||
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import time
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
EVENT_TYPE = "supervisor"
|
EVENT_TYPE = "supervisor"
|
||||||
|
|
||||||
|
@ -37,8 +34,8 @@ program_matcher = re.compile("^\w+:? '?(?P<program>\w+)'?")
|
||||||
|
|
||||||
|
|
||||||
def parse_supervisord(log, line):
|
def parse_supervisord(log, line):
|
||||||
"""
|
"""Parse the supervisord.log line into a dogstream event.
|
||||||
Parse the supervisord.log line into a dogstream event
|
|
||||||
"""
|
"""
|
||||||
if len(line) == 0:
|
if len(line) == 0:
|
||||||
log.info("Skipping empty line of supervisord.log")
|
log.info("Skipping empty line of supervisord.log")
|
||||||
|
@ -71,9 +68,9 @@ def parse_supervisord(log, line):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
|
||||||
import pprint
|
|
||||||
import logging
|
import logging
|
||||||
|
import pprint
|
||||||
|
import sys
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
log = logging.getLogger()
|
log = logging.getLogger()
|
||||||
lines = open(sys.argv[1]).readlines()
|
lines = open(sys.argv[1]).readlines()
|
||||||
|
|
|
@ -1,19 +1,14 @@
|
||||||
# std
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
try:
|
|
||||||
from yaml import CLoader as Loader
|
|
||||||
except ImportError:
|
|
||||||
from yaml import Loader
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import glob
|
import glob
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from monagent.common.util import PidFile, get_os
|
import yaml
|
||||||
|
|
||||||
|
import monagent.common.util
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -55,7 +50,7 @@ class InvalidJMXConfiguration(Exception):
|
||||||
|
|
||||||
|
|
||||||
class JMXFetch(object):
|
class JMXFetch(object):
|
||||||
pid_file = PidFile("jmxfetch")
|
pid_file = monagent.common.util.PidFile("jmxfetch")
|
||||||
pid_file_path = pid_file.get_path()
|
pid_file_path = pid_file.get_path()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -95,27 +90,26 @@ class JMXFetch(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def should_run(cls, confd_path, checks_list):
|
def should_run(cls, confd_path, checks_list):
|
||||||
"""
|
'''Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options).
|
||||||
Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options)
|
|
||||||
|
|
||||||
jmx_checks: list of yaml files that are jmx checks
|
jmx_checks: list of yaml files that are jmx checks
|
||||||
(they have the is_jmx flag enabled or they are in JMX_CHECKS)
|
(they have the is_jmx flag enabled or they are in JMX_CHECKS)
|
||||||
and that have at least one instance configured
|
and that have at least one instance configured
|
||||||
|
|
||||||
invalid_checks: dictionary whose keys are check names that are JMX checks but
|
invalid_checks: dictionary whose keys are check names that are JMX checks but
|
||||||
they have a bad configuration. Values of the dictionary are exceptions generated
|
they have a bad configuration. Values of the dictionary are exceptions generated
|
||||||
when checking the configuration
|
when checking the configuration
|
||||||
|
|
||||||
java_bin_path: is the path to the java executable. It was
|
java_bin_path: is the path to the java executable. It was
|
||||||
previously set in the "instance" part of the yaml file of the
|
previously set in the "instance" part of the yaml file of the
|
||||||
jmx check. So we need to parse yaml files to get it.
|
jmx check. So we need to parse yaml files to get it.
|
||||||
We assume that this value is alwayws the same for every jmx check
|
We assume that this value is alwayws the same for every jmx check
|
||||||
so we can return the first value returned
|
so we can return the first value returned
|
||||||
|
|
||||||
java_options: is string contains options that will be passed to java_bin_path
|
java_options: is string contains options that will be passed to java_bin_path
|
||||||
We assume that this value is alwayws the same for every jmx check
|
We assume that this value is alwayws the same for every jmx check
|
||||||
so we can return the first value returned
|
so we can return the first value returned
|
||||||
"""
|
'''
|
||||||
|
|
||||||
jmx_checks = []
|
jmx_checks = []
|
||||||
java_bin_path = None
|
java_bin_path = None
|
||||||
|
@ -129,6 +123,10 @@ class JMXFetch(object):
|
||||||
if os.path.exists(conf):
|
if os.path.exists(conf):
|
||||||
f = open(conf)
|
f = open(conf)
|
||||||
try:
|
try:
|
||||||
|
if hasattr(yaml, 'CLoader'):
|
||||||
|
Loader = yaml.CLoader
|
||||||
|
else:
|
||||||
|
Loader = yaml.Loader
|
||||||
check_config = yaml.load(f.read(), Loader=Loader)
|
check_config = yaml.load(f.read(), Loader=Loader)
|
||||||
assert check_config is not None
|
assert check_config is not None
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -236,7 +234,7 @@ class JMXFetch(object):
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if get_os() != 'windows':
|
if monagent.common.util.get_os() != 'windows':
|
||||||
try:
|
try:
|
||||||
os.kill(pid, 0)
|
os.kill(pid, 0)
|
||||||
# os.kill(pid, 0) will throw an exception if pid is not running
|
# os.kill(pid, 0) will throw an exception if pid is not running
|
||||||
|
@ -292,7 +290,7 @@ class JMXFetch(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_path_to_jmxfetch(cls):
|
def get_path_to_jmxfetch(cls):
|
||||||
if get_os() != 'windows':
|
if monagent.common.util.get_os() != 'windows':
|
||||||
return os.path.realpath(
|
return os.path.realpath(
|
||||||
os.path.join(os.path.abspath(__file__), "..", "../collector/checks", "libs",
|
os.path.join(os.path.abspath(__file__), "..", "../collector/checks", "libs",
|
||||||
JMX_FETCH_JAR_NAME))
|
JMX_FETCH_JAR_NAME))
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
""" Tools for loading Python modules from arbitrary locations.
|
"""Tools for loading Python modules from arbitrary locations.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import imp
|
import imp
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# todo seems to be only used by dogstream at this point, possibly remove?
|
# todo seems to be only used by dogstream at this point, possibly remove?
|
||||||
|
|
||||||
|
|
||||||
def imp_type_for_filename(filename):
|
def imp_type_for_filename(filename):
|
||||||
"""Given the name of a Python module, return a type description suitable to
|
"""Given the name of a Python module, return a type description suitable to be passed to imp.load_module().
|
||||||
be passed to imp.load_module()"""
|
|
||||||
|
"""
|
||||||
for type_data in imp.get_suffixes():
|
for type_data in imp.get_suffixes():
|
||||||
extension = type_data[0]
|
extension = type_data[0]
|
||||||
if filename.endswith(extension):
|
if filename.endswith(extension):
|
||||||
|
@ -19,7 +21,9 @@ def imp_type_for_filename(filename):
|
||||||
|
|
||||||
|
|
||||||
def load_qualified_module(full_module_name, path=None):
|
def load_qualified_module(full_module_name, path=None):
|
||||||
"""Load a module which may be within a package"""
|
"""Load a module which may be within a package.
|
||||||
|
|
||||||
|
"""
|
||||||
remaining_pieces = full_module_name.split('.')
|
remaining_pieces = full_module_name.split('.')
|
||||||
done_pieces = []
|
done_pieces = []
|
||||||
file_obj = None
|
file_obj = None
|
||||||
|
@ -40,7 +44,8 @@ def module_name_for_filename(filename):
|
||||||
"""Given the name of a Python file, find an appropropriate module name.
|
"""Given the name of a Python file, find an appropropriate module name.
|
||||||
|
|
||||||
This involves determining whether the file is within a package, and
|
This involves determining whether the file is within a package, and
|
||||||
determining the name of same."""
|
determining the name of same.
|
||||||
|
"""
|
||||||
all_segments = filename.split(os.sep)
|
all_segments = filename.split(os.sep)
|
||||||
path_elements = all_segments[:-1]
|
path_elements = all_segments[:-1]
|
||||||
module_elements = [all_segments[-1].rsplit('.', 1)[0]]
|
module_elements = [all_segments[-1].rsplit('.', 1)[0]]
|
||||||
|
@ -52,10 +57,10 @@ def module_name_for_filename(filename):
|
||||||
|
|
||||||
|
|
||||||
def get_module(name):
|
def get_module(name):
|
||||||
"""Given either an absolute path to a Python file or a module name, load
|
"""Given either an absolute path to a Python file or a module name, load and return a Python module.
|
||||||
and return a Python module.
|
|
||||||
|
|
||||||
If the module is already loaded, takes no action."""
|
If the module is already loaded, takes no action.
|
||||||
|
"""
|
||||||
if name.startswith('/'):
|
if name.startswith('/'):
|
||||||
basename, modulename = module_name_for_filename(name)
|
basename, modulename = module_name_for_filename(name)
|
||||||
path = [basename]
|
path = [basename]
|
||||||
|
@ -68,8 +73,9 @@ def get_module(name):
|
||||||
|
|
||||||
|
|
||||||
def load(config_string, default_name=None):
|
def load(config_string, default_name=None):
|
||||||
"""Given a module name and an object expected to be contained within,
|
"""Given a module name and an object expected to be contained within, return said object.
|
||||||
return said object"""
|
|
||||||
|
"""
|
||||||
(module_name, object_name) = (config_string.rsplit(':', 1) + [default_name])[:2]
|
(module_name, object_name) = (config_string.rsplit(':', 1) + [default_name])[:2]
|
||||||
module = get_module(module_name)
|
module = get_module(module_name)
|
||||||
if object_name:
|
if object_name:
|
||||||
|
|
|
@ -709,6 +709,6 @@ def get_jmx_status():
|
||||||
|
|
||||||
return check_statuses
|
return check_statuses
|
||||||
|
|
||||||
except Exception as e:
|
except Exception:
|
||||||
log.exception("Couldn't load latest jmx status")
|
log.exception("Couldn't load latest jmx status")
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -75,8 +75,6 @@ def _windows_commondata_path():
|
||||||
import ctypes
|
import ctypes
|
||||||
from ctypes import wintypes, windll
|
from ctypes import wintypes, windll
|
||||||
|
|
||||||
CSIDL_COMMON_APPDATA = 35
|
|
||||||
|
|
||||||
_SHGetFolderPath = windll.shell32.SHGetFolderPathW
|
_SHGetFolderPath = windll.shell32.SHGetFolderPathW
|
||||||
_SHGetFolderPath.argtypes = [wintypes.HWND,
|
_SHGetFolderPath.argtypes = [wintypes.HWND,
|
||||||
ctypes.c_int,
|
ctypes.c_int,
|
||||||
|
@ -84,7 +82,6 @@ def _windows_commondata_path():
|
||||||
wintypes.DWORD, wintypes.LPCWSTR]
|
wintypes.DWORD, wintypes.LPCWSTR]
|
||||||
|
|
||||||
path_buf = wintypes.create_unicode_buffer(wintypes.MAX_PATH)
|
path_buf = wintypes.create_unicode_buffer(wintypes.MAX_PATH)
|
||||||
result = _SHGetFolderPath(0, CSIDL_COMMON_APPDATA, 0, 0, path_buf)
|
|
||||||
return path_buf.value
|
return path_buf.value
|
||||||
|
|
||||||
|
|
||||||
|
@ -488,7 +485,6 @@ def get_win32service_file(osname, filename):
|
||||||
|
|
||||||
def check_yaml(conf_path):
|
def check_yaml(conf_path):
|
||||||
f = open(conf_path)
|
f = open(conf_path)
|
||||||
check_name = os.path.basename(conf_path).split('.')[0]
|
|
||||||
try:
|
try:
|
||||||
check_config = yaml.load(f.read(), Loader=Loader)
|
check_config = yaml.load(f.read(), Loader=Loader)
|
||||||
assert 'init_config' in check_config, "No 'init_config' section found"
|
assert 'init_config' in check_config, "No 'init_config' section found"
|
||||||
|
@ -588,7 +584,6 @@ def load_check_directory(agent_config):
|
||||||
# Check if the config exists OR we match the old-style config
|
# Check if the config exists OR we match the old-style config
|
||||||
conf_path = os.path.join(confd_path, '%s.yaml' % check_name)
|
conf_path = os.path.join(confd_path, '%s.yaml' % check_name)
|
||||||
if os.path.exists(conf_path):
|
if os.path.exists(conf_path):
|
||||||
f = open(conf_path)
|
|
||||||
try:
|
try:
|
||||||
check_config = check_yaml(conf_path)
|
check_config = check_yaml(conf_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -35,7 +35,6 @@ class AgentSupervisor(object):
|
||||||
`child_func` is a function that should be run by the forked child
|
`child_func` is a function that should be run by the forked child
|
||||||
that will auto-restart with the RESTART_EXIT_STATUS.
|
that will auto-restart with the RESTART_EXIT_STATUS.
|
||||||
'''
|
'''
|
||||||
exit_code = cls.RESTART_EXIT_STATUS
|
|
||||||
|
|
||||||
# Allow the child process to die on SIGTERM
|
# Allow the child process to die on SIGTERM
|
||||||
signal.signal(signal.SIGTERM, cls._handle_sigterm)
|
signal.signal(signal.SIGTERM, cls._handle_sigterm)
|
||||||
|
@ -47,7 +46,6 @@ class AgentSupervisor(object):
|
||||||
# The parent waits on the child.
|
# The parent waits on the child.
|
||||||
cls.child_pid = pid
|
cls.child_pid = pid
|
||||||
_, status = os.waitpid(pid, 0)
|
_, status = os.waitpid(pid, 0)
|
||||||
exit_code = status >> 8
|
|
||||||
if parent_func is not None:
|
if parent_func is not None:
|
||||||
parent_func()
|
parent_func()
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -172,7 +172,7 @@ def get_hostname(config=None):
|
||||||
if hostname is None:
|
if hostname is None:
|
||||||
try:
|
try:
|
||||||
socket_hostname = socket.gethostname()
|
socket_hostname = socket.gethostname()
|
||||||
except socket.error as e:
|
except socket.error:
|
||||||
socket_hostname = None
|
socket_hostname = None
|
||||||
if socket_hostname and is_valid_hostname(socket_hostname):
|
if socket_hostname and is_valid_hostname(socket_hostname):
|
||||||
hostname = socket_hostname
|
hostname = socket_hostname
|
||||||
|
|
|
@ -114,5 +114,5 @@ class Reporter(threading.Thread):
|
||||||
event_count=event_count,
|
event_count=event_count,
|
||||||
).persist()
|
).persist()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception:
|
||||||
log.exception("Error flushing metrics")
|
log.exception("Error flushing metrics")
|
||||||
|
|
|
@ -38,7 +38,7 @@ class Server(object):
|
||||||
try:
|
try:
|
||||||
self.forward_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
self.forward_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
self.forward_udp_sock.connect((forward_to_host, forward_to_port))
|
self.forward_udp_sock.connect((forward_to_host, forward_to_port))
|
||||||
except Exception as e:
|
except Exception:
|
||||||
log.exception("Error while setting up connection to external statsd server")
|
log.exception("Error while setting up connection to external statsd server")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -83,7 +83,7 @@ class Server(object):
|
||||||
elif m[0] == u'#':
|
elif m[0] == u'#':
|
||||||
event['dimensions'] = sorted(m[1:].split(u','))
|
event['dimensions'] = sorted(m[1:].split(u','))
|
||||||
return event
|
return event
|
||||||
except IndexError as ValueError:
|
except IndexError:
|
||||||
raise Exception(u'Unparseable event packet: %s' % packet)
|
raise Exception(u'Unparseable event packet: %s' % packet)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -194,7 +194,7 @@ class Server(object):
|
||||||
raise
|
raise
|
||||||
except (KeyboardInterrupt, SystemExit):
|
except (KeyboardInterrupt, SystemExit):
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception:
|
||||||
log.exception('Error receiving datagram')
|
log.exception('Error receiving datagram')
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
|
|
@ -21,6 +21,8 @@ from ddagent import Application
|
||||||
from win32.common import handle_exe_click
|
from win32.common import handle_exe_click
|
||||||
from collector.jmxfetch import JMXFetch
|
from collector.jmxfetch import JMXFetch
|
||||||
|
|
||||||
|
from monagent.common.config import get_config, load_check_directory, set_win32_cert_path
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
RESTART_INTERVAL = 24 * 60 * 60 # Defaults to 1 day
|
RESTART_INTERVAL = 24 * 60 * 60 # Defaults to 1 day
|
||||||
|
|
||||||
|
@ -118,8 +120,7 @@ class DDAgent(multiprocessing.Process):
|
||||||
def run(self):
|
def run(self):
|
||||||
log.debug("Windows Service - Starting collector")
|
log.debug("Windows Service - Starting collector")
|
||||||
emitters = self.get_emitters()
|
emitters = self.get_emitters()
|
||||||
systemStats = get_system_stats()
|
self.collector = Collector(self.config, emitters)
|
||||||
self.collector = Collector(self.config, emitters, systemStats)
|
|
||||||
|
|
||||||
# Load the checks_d checks
|
# Load the checks_d checks
|
||||||
checksd = load_check_directory(self.config)
|
checksd = load_check_directory(self.config)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright © 2009-2010 CEA
|
# Copyright <EFBFBD><EFBFBD> 2009-2010 CEA
|
||||||
# Pierre Raybaut
|
# Pierre Raybaut
|
||||||
# Licensed under the terms of the CECILL License
|
# Licensed under the terms of the CECILL License
|
||||||
# Modified for Datadog
|
# Modified for Datadog
|
||||||
|
@ -19,6 +19,8 @@ import win32service
|
||||||
|
|
||||||
# GUI Imports
|
# GUI Imports
|
||||||
from guidata.qt.QtCore import SIGNAL, Qt, QSize, QPoint, QTimer
|
from guidata.qt.QtCore import SIGNAL, Qt, QSize, QPoint, QTimer
|
||||||
|
from guidata.qt.QtGui import QInputDialog, QWidget, QFont, QLabel, QGroupBox, QHBoxLayout, QSystemTrayIcon
|
||||||
|
from guidata.qt.QtGui import QVBoxLayout, QPushButton, QSplitter, QListWidget, QMenu, QMessageBox
|
||||||
|
|
||||||
from guidata.configtools import get_icon, get_family, MONOSPACE
|
from guidata.configtools import get_icon, get_family, MONOSPACE
|
||||||
from guidata.qthelpers import get_std_icon
|
from guidata.qthelpers import get_std_icon
|
||||||
|
|
|
@ -6,6 +6,7 @@ import collections
|
||||||
class Plugins(collections.defaultdict):
|
class Plugins(collections.defaultdict):
|
||||||
|
|
||||||
"""A container for the plugin configurations used by the monasca-agent.
|
"""A container for the plugin configurations used by the monasca-agent.
|
||||||
|
|
||||||
This is essentially a defaultdict(dict) but put into a class primarily to make the interface clear, also
|
This is essentially a defaultdict(dict) but put into a class primarily to make the interface clear, also
|
||||||
to add a couple of helper methods.
|
to add a couple of helper methods.
|
||||||
Each plugin config is stored with the key being its config name (excluding .yaml).
|
Each plugin config is stored with the key being its config name (excluding .yaml).
|
||||||
|
@ -21,7 +22,8 @@ class Plugins(collections.defaultdict):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def merge(self, other):
|
def merge(self, other):
|
||||||
"""Do a deep merge with precedence going to other (as is the case with update)
|
"""Do a deep merge with precedence going to other (as is the case with update).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Implemented as a function so it can be used for arbitrary dictionaries not just self, this is needed
|
# Implemented as a function so it can be used for arbitrary dictionaries not just self, this is needed
|
||||||
# for the recursive nature of the merge.
|
# for the recursive nature of the merge.
|
||||||
|
@ -29,7 +31,8 @@ class Plugins(collections.defaultdict):
|
||||||
|
|
||||||
|
|
||||||
def deep_merge(adict, other):
|
def deep_merge(adict, other):
|
||||||
"""A recursive merge of two dictionaries including combining of any lists within the data structure
|
"""A recursive merge of two dictionaries including combining of any lists within the data structure.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for key, value in other.iteritems():
|
for key, value in other.iteritems():
|
||||||
if key in adict:
|
if key in adict:
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
from plugin import Plugin
|
from plugin import Plugin
|
||||||
from utils import find_process_cmdline, find_process_name, watch_process, service_api_check
|
from utils import find_process_cmdline
|
||||||
|
from utils import find_process_name
|
||||||
|
from utils import watch_process
|
||||||
|
from utils import service_api_check
|
||||||
from service_plugin import ServicePlugin
|
from service_plugin import ServicePlugin
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
"""Classes for detection of running resources to be monitored.
|
"""Classes for detection of running resources to be monitored.
|
||||||
|
|
||||||
Detection classes should be platform independent
|
Detection classes should be platform independent
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Plugin(object):
|
class Plugin(object):
|
||||||
|
|
||||||
"""Abstract class implemented by the monasca-agent plugin detection classes
|
"""Abstract class implemented by the monasca-agent plugin detection classes.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# todo these should include dependency detection
|
# todo these should include dependency detection
|
||||||
|
|
||||||
|
@ -18,22 +20,28 @@ class Plugin(object):
|
||||||
self._detect()
|
self._detect()
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
|
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dependencies_installed(self):
|
def dependencies_installed(self):
|
||||||
"""return True if dependencies are installed
|
"""Return True if dependencies are installed.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
"""Return _name if set otherwise the class name"""
|
"""Return _name if set otherwise the class name.
|
||||||
|
|
||||||
|
"""
|
||||||
if '_name' in self.__dict__:
|
if '_name' in self.__dict__:
|
||||||
return self._name
|
return self._name
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Ceilometer(ServicePlugin):
|
class Ceilometer(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Ceilometer daemons and setup configuration to monitor them."""
|
"""Detect Ceilometer daemons and setup configuration to monitor them."""
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Cinder(ServicePlugin):
|
class Cinder(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Cinder daemons and setup configuration to monitor them."""
|
"""Detect Cinder daemons and setup configuration to monitor them."""
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Glance(ServicePlugin):
|
class Glance(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Glance daemons and setup configuration to monitor them."""
|
"""Detect Glance daemons and setup configuration to monitor them."""
|
||||||
|
|
||||||
|
|
|
@ -1,30 +1,34 @@
|
||||||
import collections
|
import collections
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from monsetup.detection import Plugin, find_process_cmdline, watch_process
|
import monsetup.agent_config
|
||||||
from monsetup import agent_config
|
import monsetup.detection
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Kafka(Plugin):
|
class Kafka(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect Kafka daemons and sets up configuration to monitor them.
|
"""Detect Kafka daemons and sets up configuration to monitor them.
|
||||||
|
|
||||||
This plugin configures the kafka_consumer plugin and does not configure any jmx based checks against kafka.
|
This plugin configures the kafka_consumer plugin and does not configure any jmx based checks against kafka.
|
||||||
Note this plugin will pull the same information from kafka on each node in the cluster it runs on.
|
Note this plugin will pull the same information from kafka on each node in the cluster it runs on.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_cmdline('kafka') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_cmdline('kafka') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config = agent_config.Plugins()
|
config = monsetup.agent_config.Plugins()
|
||||||
# First watch the process
|
# First watch the process
|
||||||
config.merge(watch_process(['kafka']))
|
config.merge(monsetup.detection.watch_process(['kafka']))
|
||||||
log.info("\tWatching the kafka process.")
|
log.info("\tWatching the kafka process.")
|
||||||
|
|
||||||
if self.dependencies_installed():
|
if self.dependencies_installed():
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Keystone(ServicePlugin):
|
class Keystone(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Keystone daemons and setup configuration to monitor them."""
|
"""Detect Keystone daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, template_dir, overwrite=True):
|
def __init__(self, template_dir, overwrite=True):
|
||||||
service_params = {
|
service_params = {
|
||||||
|
|
|
@ -1,28 +1,33 @@
|
||||||
"""Classes for monitoring the monitoring server stack.
|
"""Classes for monitoring the monitoring server stack.
|
||||||
|
|
||||||
Covering mon-persister, mon-api and mon-thresh.
|
Covering mon-persister, mon-api and mon-thresh.
|
||||||
Kafka, mysql, vertica and influxdb are covered by other detection plugins. Mon-notification uses statsd.
|
Kafka, mysql, vertica and influxdb are covered by other detection plugins. Mon-notification uses statsd.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from monsetup.detection import Plugin, find_process_cmdline, watch_process
|
import monsetup.agent_config
|
||||||
from monsetup import agent_config
|
import monsetup.detection
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class MonPersister(Plugin):
|
class MonPersister(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect mon_persister and setup monitoring.
|
"""Detect mon_persister and setup monitoring.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_cmdline('mon-persister') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_cmdline('mon-persister') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
log.info("\tEnabling the mon persister healthcheck")
|
log.info("\tEnabling the mon persister healthcheck")
|
||||||
return dropwizard_health_check('mon-persister', 'http://localhost:8091/healthcheck')
|
return dropwizard_health_check('mon-persister', 'http://localhost:8091/healthcheck')
|
||||||
|
@ -35,14 +40,15 @@ class MonPersister(Plugin):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class MonAPI(Plugin):
|
class MonAPI(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect mon_api and setup monitoring.
|
"""Detect mon_api and setup monitoring.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected."""
|
||||||
if find_process_cmdline('mon-api') is not None:
|
if monsetup.detection.find_process_cmdline('mon-api') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
|
@ -59,28 +65,35 @@ class MonAPI(Plugin):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class MonThresh(Plugin):
|
class MonThresh(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect the running mon-thresh and monitor"""
|
"""Detect the running mon-thresh and monitor.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_cmdline('mon-thresh') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_cmdline('mon-thresh') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
log.info("\tWatching the mon-thresh process.")
|
log.info("\tWatching the mon-thresh process.")
|
||||||
return watch_process(['mon-thresh'])
|
return monsetup.detection.watch_process(['mon-thresh'])
|
||||||
|
|
||||||
def dependencies_installed(self):
|
def dependencies_installed(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def dropwizard_health_check(name, url):
|
def dropwizard_health_check(name, url):
|
||||||
"""Setup a dropwizard heathcheck to be watched by the http_check plugin."""
|
"""Setup a dropwizard heathcheck to be watched by the http_check plugin.
|
||||||
config = agent_config.Plugins()
|
|
||||||
|
"""
|
||||||
|
config = monsetup.agent_config.Plugins()
|
||||||
config['http_check'] = {'init_config': None,
|
config['http_check'] = {'init_config': None,
|
||||||
'instances': [{'name': name,
|
'instances': [{'name': name,
|
||||||
'url': url,
|
'url': url,
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from monsetup.detection import Plugin, find_process_name, watch_process
|
import monsetup.agent_config
|
||||||
from monsetup import agent_config
|
import monsetup.detection
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
mysql_conf = '/root/.my.cnf'
|
mysql_conf = '/root/.my.cnf'
|
||||||
|
|
||||||
|
|
||||||
class MySQL(Plugin):
|
class MySQL(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect MySQL daemons and setup configuration to monitor them.
|
"""Detect MySQL daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
This plugin needs user/pass infor for mysql setup, this is
|
This plugin needs user/pass infor for mysql setup, this is
|
||||||
best placed in /root/.my.cnf in a format such as
|
best placed in /root/.my.cnf in a format such as
|
||||||
[client]
|
[client]
|
||||||
|
@ -19,16 +20,19 @@ class MySQL(Plugin):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected"""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_name('mysqld') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_name('mysqld') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config = agent_config.Plugins()
|
config = monsetup.agent_config.Plugins()
|
||||||
# First watch the process
|
# First watch the process
|
||||||
config.merge(watch_process(['mysqld']))
|
config.merge(monsetup.detection.watch_process(['mysqld']))
|
||||||
log.info("\tWatching the mysqld process.")
|
log.info("\tWatching the mysqld process.")
|
||||||
|
|
||||||
# Attempt login, requires either an empty root password from localhost
|
# Attempt login, requires either an empty root password from localhost
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from monsetup.detection import Plugin
|
from monsetup.detection import Plugin
|
||||||
|
@ -8,14 +9,18 @@ from monsetup import agent_config
|
||||||
class Network(Plugin):
|
class Network(Plugin):
|
||||||
|
|
||||||
"""No configuration here, working networking is assumed so this is either on or off.
|
"""No configuration here, working networking is assumed so this is either on or off.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
|
|
||||||
|
"""
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# A bit silly to parse the yaml only for it to be converted back but this
|
# A bit silly to parse the yaml only for it to be converted back but this
|
||||||
# plugin is the exception not the rule
|
# plugin is the exception not the rule
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Neutron(ServicePlugin):
|
class Neutron(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Neutron daemons and setup configuration to monitor them."""
|
"""Detect Neutron daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, template_dir, overwrite=True):
|
def __init__(self, template_dir, overwrite=True):
|
||||||
service_params = {
|
service_params = {
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Nova(ServicePlugin):
|
class Nova(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Nova daemons and setup configuration to monitor them."""
|
"""Detect Nova daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, template_dir, overwrite=True):
|
def __init__(self, template_dir, overwrite=True):
|
||||||
service_params = {
|
service_params = {
|
||||||
|
|
|
@ -1,30 +1,35 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from monsetup.detection import Plugin, find_process_name
|
import monsetup.agent_config
|
||||||
from monsetup import agent_config
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Postfix(Plugin):
|
class Postfix(monsetup.detection.Plugin):
|
||||||
|
|
||||||
|
"""If postfix is running install the default config.
|
||||||
|
|
||||||
"""If postfix is running install the default config
|
|
||||||
"""
|
"""
|
||||||
# todo this is is disabled as postfix requires passwordless sudo for the
|
# todo this is is disabled as postfix requires passwordless sudo for the
|
||||||
# monasca-agent user, a bad practice
|
# monasca-agent user, a bad practice
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_name('postfix') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_name('postfix') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# A bit silly to parse the yaml only for it to be converted back but this
|
# A bit silly to parse the yaml only for it to be converted back but this
|
||||||
# plugin is the exception not the rule
|
# plugin is the exception not the rule
|
||||||
with open(os.path.join(self.template_dir, 'conf.d/postfix.yaml.example'), 'r') as postfix_template:
|
with open(os.path.join(self.template_dir, 'conf.d/postfix.yaml.example'), 'r') as postfix_template:
|
||||||
default_net_config = yaml.load(postfix_template.read())
|
default_net_config = yaml.load(postfix_template.read())
|
||||||
config = agent_config.Plugins()
|
config = monsetup.agent_config.Plugins()
|
||||||
config['postfix'] = default_net_config
|
config['postfix'] = default_net_config
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
from monsetup.detection import ServicePlugin
|
import monsetup.detection
|
||||||
|
|
||||||
|
|
||||||
class Swift(ServicePlugin):
|
class Swift(monsetup.detection.ServicePlugin):
|
||||||
|
|
||||||
"""Detect Swift daemons and setup configuration to monitor them."""
|
"""Detect Swift daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, template_dir, overwrite=True):
|
def __init__(self, template_dir, overwrite=True):
|
||||||
service_params = {
|
service_params = {
|
||||||
|
|
|
@ -1,30 +1,35 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from monsetup.detection import Plugin, find_process_cmdline, watch_process
|
import monsetup.agent_config
|
||||||
from monsetup import agent_config
|
import monsetup.detection
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Zookeeper(Plugin):
|
class Zookeeper(monsetup.detection.Plugin):
|
||||||
|
|
||||||
"""Detect Zookeeper daemons and setup configuration to monitor them.
|
"""Detect Zookeeper daemons and setup configuration to monitor them.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection, set self.available True if the service is detected."""
|
"""Run detection, set self.available True if the service is detected.
|
||||||
if find_process_cmdline('zookeeper') is not None:
|
|
||||||
|
"""
|
||||||
|
if monsetup.detection.find_process_cmdline('zookeeper') is not None:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config = agent_config.Plugins()
|
config = monsetup.agent_config.Plugins()
|
||||||
# First watch the process
|
# First watch the process
|
||||||
log.info("\tWatching the zookeeper process.")
|
log.info("\tWatching the zookeeper process.")
|
||||||
config.merge(watch_process(['zookeeper']))
|
config.merge(monsetup.detection.watch_process(['zookeeper']))
|
||||||
|
|
||||||
log.info("\tEnabling the zookeeper plugin")
|
log.info("\tEnabling the zookeeper plugin")
|
||||||
with open(os.path.join(self.template_dir, 'conf.d/zk.yaml.example'), 'r') as zk_template:
|
with open(os.path.join(self.template_dir, 'conf.d/zk.yaml.example'), 'r') as zk_template:
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from plugin import Plugin
|
from plugin import Plugin
|
||||||
|
|
||||||
from monsetup import agent_config
|
from monsetup import agent_config
|
||||||
from monsetup.detection import find_process_cmdline, watch_process, service_api_check
|
from monsetup.detection import find_process_cmdline
|
||||||
|
from monsetup.detection import service_api_check
|
||||||
|
from monsetup.detection import watch_process
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -11,6 +14,7 @@ log = logging.getLogger(__name__)
|
||||||
class ServicePlugin(Plugin):
|
class ServicePlugin(Plugin):
|
||||||
|
|
||||||
"""Base class implemented by the monasca-agent plugin detection classes
|
"""Base class implemented by the monasca-agent plugin detection classes
|
||||||
|
|
||||||
for OpenStack Services
|
for OpenStack Services
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -23,7 +27,9 @@ class ServicePlugin(Plugin):
|
||||||
super(ServicePlugin, self).__init__(kwargs['template_dir'], kwargs['overwrite'])
|
super(ServicePlugin, self).__init__(kwargs['template_dir'], kwargs['overwrite'])
|
||||||
|
|
||||||
def _detect(self):
|
def _detect(self):
|
||||||
"""Run detection"""
|
"""Run detection.
|
||||||
|
|
||||||
|
"""
|
||||||
self.found_processes = []
|
self.found_processes = []
|
||||||
|
|
||||||
for process in self.process_names:
|
for process in self.process_names:
|
||||||
|
@ -34,6 +40,7 @@ class ServicePlugin(Plugin):
|
||||||
|
|
||||||
def build_config(self):
|
def build_config(self):
|
||||||
"""Build the config as a Plugins object and return.
|
"""Build the config as a Plugins object and return.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
config = agent_config.Plugins()
|
config = agent_config.Plugins()
|
||||||
for process in self.found_processes:
|
for process in self.found_processes:
|
||||||
|
@ -50,6 +57,7 @@ class ServicePlugin(Plugin):
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def dependencies_installed(self):
|
def dependencies_installed(self):
|
||||||
"""return True if dependencies are installed
|
"""Return True if dependencies are installed.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -6,7 +6,8 @@ from monsetup import agent_config
|
||||||
|
|
||||||
|
|
||||||
def find_process_cmdline(search_string):
|
def find_process_cmdline(search_string):
|
||||||
"""Simple function to search running process for one with cmdline containing
|
"""Simple function to search running process for one with cmdline containing.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for process in psutil.process_iter():
|
for process in psutil.process_iter():
|
||||||
for arg in process.cmdline():
|
for arg in process.cmdline():
|
||||||
|
@ -18,6 +19,7 @@ def find_process_cmdline(search_string):
|
||||||
|
|
||||||
def find_process_name(pname):
|
def find_process_name(pname):
|
||||||
"""Simple function to search running process for one with pname.
|
"""Simple function to search running process for one with pname.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for process in psutil.process_iter():
|
for process in psutil.process_iter():
|
||||||
if pname == process.name():
|
if pname == process.name():
|
||||||
|
@ -28,6 +30,7 @@ def find_process_name(pname):
|
||||||
|
|
||||||
def watch_process(search_strings, service=None, component=None):
|
def watch_process(search_strings, service=None, component=None):
|
||||||
"""Takes a list of process search strings and returns a Plugins object with the config set.
|
"""Takes a list of process search strings and returns a Plugins object with the config set.
|
||||||
|
|
||||||
This was built as a helper as many plugins setup process watching
|
This was built as a helper as many plugins setup process watching
|
||||||
"""
|
"""
|
||||||
config = agent_config.Plugins()
|
config = agent_config.Plugins()
|
||||||
|
@ -44,7 +47,9 @@ def watch_process(search_strings, service=None, component=None):
|
||||||
|
|
||||||
|
|
||||||
def service_api_check(name, url, pattern, service=None, component=None):
|
def service_api_check(name, url, pattern, service=None, component=None):
|
||||||
"""Setup a service api to be watched by the http_check plugin."""
|
"""Setup a service api to be watched by the http_check plugin.
|
||||||
|
|
||||||
|
"""
|
||||||
config = agent_config.Plugins()
|
config = agent_config.Plugins()
|
||||||
parameters = {'name': name,
|
parameters = {'name': name,
|
||||||
'url': url,
|
'url': url,
|
||||||
|
|
|
@ -9,13 +9,22 @@ import pwd
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import yaml
|
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
|
import yaml
|
||||||
import agent_config
|
import agent_config
|
||||||
from detection.plugins import kafka, mon, mysql, network, zookeeper
|
from detection.plugins import kafka
|
||||||
from detection.plugins import nova, glance, cinder, neutron, swift
|
from detection.plugins import mon
|
||||||
from detection.plugins import keystone, ceilometer
|
from detection.plugins import mysql
|
||||||
|
from detection.plugins import network
|
||||||
|
from detection.plugins import zookeeper
|
||||||
|
from detection.plugins import nova
|
||||||
|
from detection.plugins import glance
|
||||||
|
from detection.plugins import cinder
|
||||||
|
from detection.plugins import neutron
|
||||||
|
from detection.plugins import swift
|
||||||
|
from detection.plugins import keystone
|
||||||
|
from detection.plugins import ceilometer
|
||||||
from service import sysv
|
from service import sysv
|
||||||
|
|
||||||
# List of all detection plugins to run
|
# List of all detection plugins to run
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
"""Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately
|
"""Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import psutil
|
import psutil
|
||||||
|
|
||||||
|
|
||||||
class Service(object):
|
class Service(object):
|
||||||
|
|
||||||
"""Abstract base class implementing the interface for various service types."""
|
"""Abstract base class implementing the interface for various service types.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, config_dir, log_dir, name='monasca-agent'):
|
def __init__(self, config_dir, log_dir, name='monasca-agent'):
|
||||||
self.config_dir = config_dir
|
self.config_dir = config_dir
|
||||||
|
@ -14,29 +17,34 @@ class Service(object):
|
||||||
|
|
||||||
def enable(self):
|
def enable(self):
|
||||||
"""Sets monasca-agent to start on boot.
|
"""Sets monasca-agent to start on boot.
|
||||||
|
|
||||||
Generally this requires running as super user
|
Generally this requires running as super user
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def start(self, restart=True):
|
def start(self, restart=True):
|
||||||
"""Starts monasca-agent
|
"""Starts monasca-agent.
|
||||||
|
|
||||||
If the agent is running and restart is True, restart
|
If the agent is running and restart is True, restart
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""Stops monasca-agent
|
"""Stops monasca-agent.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def is_enabled(self):
|
def is_enabled(self):
|
||||||
"""Returns True if monasca-agent is setup to start on boot, false otherwise
|
"""Returns True if monasca-agent is setup to start on boot, false otherwise.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_running():
|
def is_running():
|
||||||
"""Returns True if monasca-agent is running, false otherwise
|
"""Returns True if monasca-agent is running, false otherwise.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Looking for the supervisor process not the individual components
|
# Looking for the supervisor process not the individual components
|
||||||
for process in psutil.process_iter():
|
for process in psutil.process_iter():
|
||||||
|
|
|
@ -1,20 +1,23 @@
|
||||||
"""System V style service.
|
"""System V style service.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from glob import glob
|
import glob
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pwd
|
import pwd
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from . import Service
|
import service
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SysV(Service):
|
class SysV(service.Service):
|
||||||
|
|
||||||
def __init__(self, init_template, config_dir, log_dir, name='monasca-agent', username='monasca-agent'):
|
def __init__(self, init_template, config_dir, log_dir, name='monasca-agent', username='monasca-agent'):
|
||||||
"""Setup this service with the given init template"""
|
"""Setup this service with the given init template.
|
||||||
|
|
||||||
|
"""
|
||||||
super(SysV, self).__init__(config_dir, log_dir, name)
|
super(SysV, self).__init__(config_dir, log_dir, name)
|
||||||
self.init_script = '/etc/init.d/%s' % self.name
|
self.init_script = '/etc/init.d/%s' % self.name
|
||||||
self.init_template = init_template
|
self.init_template = init_template
|
||||||
|
@ -22,6 +25,7 @@ class SysV(Service):
|
||||||
|
|
||||||
def enable(self):
|
def enable(self):
|
||||||
"""Sets monasca-agent to start on boot.
|
"""Sets monasca-agent to start on boot.
|
||||||
|
|
||||||
Generally this requires running as super user
|
Generally this requires running as super user
|
||||||
"""
|
"""
|
||||||
# Create monasca-agent user/group if needed
|
# Create monasca-agent user/group if needed
|
||||||
|
@ -53,7 +57,8 @@ class SysV(Service):
|
||||||
log.info('Enabled {0} service via SysV init script'.format(self.name))
|
log.info('Enabled {0} service via SysV init script'.format(self.name))
|
||||||
|
|
||||||
def start(self, restart=True):
|
def start(self, restart=True):
|
||||||
"""Starts monasca-agent
|
"""Starts monasca-agent.
|
||||||
|
|
||||||
If the agent is running and restart is True, restart
|
If the agent is running and restart is True, restart
|
||||||
"""
|
"""
|
||||||
if not self.is_enabled():
|
if not self.is_enabled():
|
||||||
|
@ -65,7 +70,8 @@ class SysV(Service):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
"""Stops monasca-agent
|
"""Stops monasca-agent.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not self.is_enabled():
|
if not self.is_enabled():
|
||||||
log.error('The service is not enabled')
|
log.error('The service is not enabled')
|
||||||
|
@ -76,12 +82,13 @@ class SysV(Service):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def is_enabled(self):
|
def is_enabled(self):
|
||||||
"""Returns True if monasca-agent is setup to start on boot, false otherwise
|
"""Returns True if monasca-agent is setup to start on boot, false otherwise.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(self.init_script):
|
if not os.path.exists(self.init_script):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if len(glob('/etc/rc?.d/S??monasca-agent')) > 0:
|
if len(glob.glob('/etc/rc?.d/S??monasca-agent')) > 0:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
24
setup.py
24
setup.py
|
@ -1,6 +1,22 @@
|
||||||
from setuptools import setup
|
#!/usr/bin/env python
|
||||||
|
|
||||||
setup(
|
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import setuptools
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
setup_requires=['pbr'],
|
setup_requires=['pbr'],
|
||||||
pbr=True
|
pbr=True)
|
||||||
)
|
|
||||||
|
|
|
@ -75,9 +75,4 @@ def get_check(name, config_str):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Unable to import check %s. Missing a class that inherits AgentCheck" % name)
|
"Unable to import check %s. Missing a class that inherits AgentCheck" % name)
|
||||||
|
|
||||||
agent_config = {
|
|
||||||
'version': '0.1',
|
|
||||||
'api_key': 'tota'
|
|
||||||
}
|
|
||||||
|
|
||||||
return check_class.from_yaml(yaml_text=config_str, check_name=name)
|
return check_class.from_yaml(yaml_text=config_str, check_name=name)
|
||||||
|
|
|
@ -73,7 +73,6 @@ class TestCacti(unittest.TestCase):
|
||||||
# Check once more to make sure last_ts ignores None vals when calculating
|
# Check once more to make sure last_ts ignores None vals when calculating
|
||||||
# where to start from
|
# where to start from
|
||||||
check.check(instances[0])
|
check.check(instances[0])
|
||||||
results3 = check.get_metrics()
|
|
||||||
last_ts2 = check.last_ts[rrd_dir + '/localhost_hdd_free_10.rrd.AVERAGE']
|
last_ts2 = check.last_ts[rrd_dir + '/localhost_hdd_free_10.rrd.AVERAGE']
|
||||||
|
|
||||||
self.assertEqual(last_ts1, last_ts2)
|
self.assertEqual(last_ts1, last_ts2)
|
||||||
|
|
|
@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
aggregator = MetricsAggregator("test_host")
|
aggregator = MetricsAggregator("test_host")
|
||||||
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
||||||
pid_file = PidFile('dogstatsd')
|
|
||||||
self.reporter = DummyReporter(aggregator)
|
self.reporter = DummyReporter(aggregator)
|
||||||
|
|
||||||
self.t1 = threading.Thread(target=self.server.start)
|
self.t1 = threading.Thread(target=self.server.start)
|
||||||
|
|
|
@ -212,6 +212,7 @@ class TestDogstream(TailTestCase):
|
||||||
'dogstreams': '%s:tests.test_datadog:parse_ancient_function_plugin' %
|
'dogstreams': '%s:tests.test_datadog:parse_ancient_function_plugin' %
|
||||||
self.log_file.name})
|
self.log_file.name})
|
||||||
actual_output = plugdog.check(self.config, move_end=False)
|
actual_output = plugdog.check(self.config, move_end=False)
|
||||||
|
self.assertEqual(expected_output, actual_output)
|
||||||
|
|
||||||
def test_dogstream_function_plugin(self):
|
def test_dogstream_function_plugin(self):
|
||||||
"""Ensure that non-class-based stateful plugins work"""
|
"""Ensure that non-class-based stateful plugins work"""
|
||||||
|
|
|
@ -26,6 +26,7 @@ class TestElastic(unittest.TestCase):
|
||||||
loop += 1
|
loop += 1
|
||||||
if loop >= MAX_WAIT:
|
if loop >= MAX_WAIT:
|
||||||
break
|
break
|
||||||
|
return request
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.process = None
|
self.process = None
|
||||||
|
|
|
@ -38,6 +38,7 @@ class HaproxyTestCase(unittest.TestCase):
|
||||||
loop += 1
|
loop += 1
|
||||||
if loop >= MAX_WAIT:
|
if loop >= MAX_WAIT:
|
||||||
break
|
break
|
||||||
|
return request
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.process = None
|
self.process = None
|
||||||
|
|
|
@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
aggregator = MetricsAggregator("test_host")
|
aggregator = MetricsAggregator("test_host")
|
||||||
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
||||||
pid_file = PidFile('dogstatsd')
|
|
||||||
self.reporter = DummyReporter(aggregator)
|
self.reporter = DummyReporter(aggregator)
|
||||||
|
|
||||||
self.t1 = threading.Thread(target=self.server.start)
|
self.t1 = threading.Thread(target=self.server.start)
|
||||||
|
|
|
@ -52,7 +52,7 @@ class TestMemCache(unittest.TestCase):
|
||||||
|
|
||||||
# Check that we got 21 metrics for a specific host
|
# Check that we got 21 metrics for a specific host
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len([t for t in r if t[3].get('dimensions') == {"instance": mythirdtag}]), 21, r)
|
len([t for t in r if t[3].get('dimensions') == {"instance": "mythirdtag"}]), 21, r)
|
||||||
|
|
||||||
def testDimensions(self):
|
def testDimensions(self):
|
||||||
raise SkipTest('Requires mcache')
|
raise SkipTest('Requires mcache')
|
||||||
|
|
|
@ -79,7 +79,6 @@ class TestMongo(unittest.TestCase):
|
||||||
c1.admin.command("replSetInitiate")
|
c1.admin.command("replSetInitiate")
|
||||||
# Sleep for 15s until replication is stable
|
# Sleep for 15s until replication is stable
|
||||||
time.sleep(30)
|
time.sleep(30)
|
||||||
x = c1.admin.command("replSetGetStatus")
|
|
||||||
assert pymongo.Connection('localhost:%s' % PORT2)
|
assert pymongo.Connection('localhost:%s' % PORT2)
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.getLogger().exception("Cannot instantiate mongod properly")
|
logging.getLogger().exception("Cannot instantiate mongod properly")
|
||||||
|
|
|
@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
aggregator = MetricsAggregator("test_host")
|
aggregator = MetricsAggregator("test_host")
|
||||||
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
||||||
pid_file = PidFile('dogstatsd')
|
|
||||||
self.reporter = DummyReporter(aggregator)
|
self.reporter = DummyReporter(aggregator)
|
||||||
|
|
||||||
self.t1 = threading.Thread(target=self.server.start)
|
self.t1 = threading.Thread(target=self.server.start)
|
||||||
|
|
|
@ -1,11 +1,17 @@
|
||||||
import unittest
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
import logging
|
||||||
|
import platform
|
||||||
|
import re
|
||||||
|
import unittest
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
logger = logging.getLogger(__file__)
|
logger = logging.getLogger(__file__)
|
||||||
|
|
||||||
from monagent.collector.checks.system.unix import *
|
|
||||||
from common import get_check
|
from common import get_check
|
||||||
|
from monagent.collector.checks.system.unix import Cpu
|
||||||
|
from monagent.collector.checks.system.unix import Disk
|
||||||
|
from monagent.collector.checks.system.unix import IO
|
||||||
|
from monagent.collector.checks.system.unix import Memory
|
||||||
|
|
||||||
|
|
||||||
class TestSystem(unittest.TestCase):
|
class TestSystem(unittest.TestCase):
|
||||||
|
@ -115,7 +121,7 @@ none 985964 1 985963 1% /lib/init/rw
|
||||||
|
|
||||||
def test_collecting_disk_metrics(self):
|
def test_collecting_disk_metrics(self):
|
||||||
"""Testing disk stats gathering"""
|
"""Testing disk stats gathering"""
|
||||||
if Platform.is_unix():
|
if platform.system() == 'Linux':
|
||||||
disk = Disk(logger, {})
|
disk = Disk(logger, {})
|
||||||
res = disk.check()
|
res = disk.check()
|
||||||
# Assert we have disk & inode stats
|
# Assert we have disk & inode stats
|
||||||
|
@ -126,7 +132,7 @@ none 985964 1 985963 1% /lib/init/rw
|
||||||
def testMemory(self):
|
def testMemory(self):
|
||||||
global logger
|
global logger
|
||||||
res = Memory(logger).check()
|
res = Memory(logger).check()
|
||||||
if Platform.is_linux():
|
if platform.system() == 'Linux':
|
||||||
for k in (
|
for k in (
|
||||||
"swapTotal", "swapFree", "swapPctFree", "swapUsed", "physTotal", "physFree",
|
"swapTotal", "swapFree", "swapPctFree", "swapUsed", "physTotal", "physFree",
|
||||||
"physUsed", "physBuffers", "physCached", "physUsable", "physPctUsable",
|
"physUsed", "physBuffers", "physCached", "physUsable", "physPctUsable",
|
||||||
|
@ -134,7 +140,7 @@ none 985964 1 985963 1% /lib/init/rw
|
||||||
assert k in res, res
|
assert k in res, res
|
||||||
assert res["swapTotal"] == res["swapFree"] + res["swapUsed"]
|
assert res["swapTotal"] == res["swapFree"] + res["swapUsed"]
|
||||||
assert res["physTotal"] == res["physFree"] + res["physUsed"]
|
assert res["physTotal"] == res["physFree"] + res["physUsed"]
|
||||||
elif sys.platform == 'darwin':
|
elif platform.system() == 'Darwin':
|
||||||
for k in ("swapFree", "swapUsed", "physFree", "physUsed"):
|
for k in ("swapFree", "swapUsed", "physFree", "physUsed"):
|
||||||
assert k in res, res
|
assert k in res, res
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
aggregator = MetricsAggregator("test_host")
|
aggregator = MetricsAggregator("test_host")
|
||||||
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
self.server = Server(aggregator, "localhost", STATSD_PORT)
|
||||||
pid_file = PidFile('dogstatsd')
|
|
||||||
self.reporter = DummyReporter(aggregator)
|
self.reporter = DummyReporter(aggregator)
|
||||||
|
|
||||||
self.t1 = threading.Thread(target=self.server.start)
|
self.t1 = threading.Thread(target=self.server.start)
|
||||||
|
|
|
@ -102,6 +102,7 @@ class PseudoAgent(object):
|
||||||
x = 0
|
x = 0
|
||||||
while True:
|
while True:
|
||||||
x = random()
|
x = random()
|
||||||
|
return x
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hanging_net():
|
def hanging_net():
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import unittest
|
import unittest
|
||||||
import logging
|
import logging
|
||||||
|
import win32evtlog
|
||||||
from nose.plugins.attrib import attr
|
from nose.plugins.attrib import attr
|
||||||
from nose.plugins.skip import SkipTest
|
from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
|
@ -43,7 +44,6 @@ class WinEventLogTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
raise SkipTest("Requires win32evtlog module")
|
raise SkipTest("Requires win32evtlog module")
|
||||||
import win32evtlog
|
|
||||||
self.LOG_EVENTS = [
|
self.LOG_EVENTS = [
|
||||||
('Test 1', win32evtlog.EVENTLOG_WARNING_TYPE),
|
('Test 1', win32evtlog.EVENTLOG_WARNING_TYPE),
|
||||||
('Test 2', win32evtlog.EVENTLOG_ERROR_TYPE),
|
('Test 2', win32evtlog.EVENTLOG_ERROR_TYPE),
|
||||||
|
|
20
tox.ini
20
tox.ini
|
@ -20,22 +20,12 @@ commands = flake8
|
||||||
commands = {posargs}
|
commands = {posargs}
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
|
max-line-length = 120
|
||||||
# TODO: ignored checks should be enabled in the future
|
# TODO: ignored checks should be enabled in the future
|
||||||
# H201 no 'except:' at least use 'except Exception:'
|
# E501 Line length > 80 characters
|
||||||
# H202 assertRaises Exception too broad
|
|
||||||
# H237 module is removed in Python
|
|
||||||
# H301 one import per line
|
|
||||||
# H305 imports not grouped correctly
|
|
||||||
# H306 imports not in alphabetical order
|
|
||||||
# H307 like imports should be grouped together
|
|
||||||
# H401 docstring should not start with a space
|
|
||||||
# H402 one line docstring needs punctuation.
|
|
||||||
# H403 multi line docstrings should end on a new line
|
|
||||||
# H404 multi line docstring should start without a leading new line
|
|
||||||
# H405 multi line docstring summary not separated with an empty line
|
|
||||||
# F401 module imported but unused
|
# F401 module imported but unused
|
||||||
# F821 undefined name
|
# H302 import only modules
|
||||||
# F841 local variable is assigned to but never used
|
# H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)
|
||||||
ignore = E501,H201,H202,H237,H301,H305,H306,H307,H401,H402,H403,H404,H405,H904,F401,F403,F821,F841
|
ignore = E501, F401, H302, H904,
|
||||||
show-source = True
|
show-source = True
|
||||||
exclude=.venv,.git,.tox,dist,*egg,build
|
exclude=.venv,.git,.tox,dist,*egg,build
|
||||||
|
|
Loading…
Reference in New Issue