Cleaned up a lot of the Pep8 violations

Cleaned up all but four of the pep8 violations
E501 Line length > 80 characters
F401 module imported but unused
H302  import only modules (DEPRECATED)
H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)

Change-Id: Id24bff6c5f8b8630a9495f49983324342841866f
This commit is contained in:
gary-hessler 2014-08-21 17:33:29 -06:00
parent d704b17d97
commit 45b156b9fe
91 changed files with 1026 additions and 795 deletions

View File

@ -8,37 +8,38 @@ from __future__ import print_function
import logging import logging
import os import os
from pprint import pprint import pprint
import re import re
import time import time
import traceback import traceback
from monagent.common import check_status import yaml
from monagent.common.keystone import Keystone
from monagent.common.config import get_confd_path import monagent.common.aggregator
from monagent.common.exceptions import CheckException, NaN, Infinity, UnknownValue import monagent.common.config
from monagent.common.util import LaconicFilter, get_hostname, get_os import monagent.common.exceptions
import monagent.common.keystone
import monagent.common.util
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# todo convert all checks to the new interface then remove this. # todo convert all checks to the new interface then remove this.
# Is the LaconicFilter on logs used elsewhere? # Is the LaconicFilter on logs used elsewhere?
# ============================================================================== # =============================================================================
# DEPRECATED # DEPRECATED
# ------------------------------ # ------------------------------
# If you are writing your own check, you should inherit from AgentCheck # If you are writing your own check, you should inherit from AgentCheck
# and not this class. This class will be removed in a future version # and not this class. This class will be removed in a future version
# of the agent. # of the agent.
# ============================================================================== # =============================================================================
class Check(object): class Check(object):
""" """(Abstract) class for all checks with the ability to:
(Abstract) class for all checks with the ability to:
* store 1 (and only 1) sample for gauges per metric/tag combination * store 1 (and only 1) sample for gauges per metric/tag combination
* compute rates for counters * compute rates for counters
* only log error messages once (instead of each time they occur) * only log error messages once (instead of each time they occur)
""" """
def __init__(self, logger, agent_config=None): def __init__(self, logger, agent_config=None):
@ -52,13 +53,14 @@ class Check(object):
self._counters = {} # metric_name: bool self._counters = {} # metric_name: bool
self.logger = logger self.logger = logger
try: try:
self.logger.addFilter(LaconicFilter()) self.logger.addFilter(monagent.common.util.LaconicFilter())
except Exception: except Exception:
self.logger.exception("Trying to install laconic log filter and failed") self.logger.exception("Trying to install laconic log filter and failed")
@staticmethod @staticmethod
def normalize(metric, prefix=None): def normalize(metric, prefix=None):
"""Turn a metric into a well-formed metric name """Turn a metric into a well-formed metric name
prefix.b.c prefix.b.c
""" """
name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric) name = re.sub(r"[,\+\*\-/()\[\]{}]", "_", metric)
@ -81,20 +83,21 @@ class Check(object):
return device_name.strip().lower().replace(' ', '_') return device_name.strip().lower().replace(' ', '_')
def counter(self, metric): def counter(self, metric):
""" """Treats the metric as a counter, i.e. computes its per second derivative
Treats the metric as a counter, i.e. computes its per second derivative
ACHTUNG: Resets previous values associated with this metric. ACHTUNG: Resets previous values associated with this metric.
""" """
self._counters[metric] = True self._counters[metric] = True
self._sample_store[metric] = {} self._sample_store[metric] = {}
def is_counter(self, metric): def is_counter(self, metric):
"Is this metric a counter?" """Is this metric a counter?
"""
return metric in self._counters return metric in self._counters
def gauge(self, metric): def gauge(self, metric):
""" """Treats the metric as a gauge, i.e. keep the data as is
Treats the metric as a gauge, i.e. keep the data as is
ACHTUNG: Resets previous values associated with this metric. ACHTUNG: Resets previous values associated with this metric.
""" """
self._sample_store[metric] = {} self._sample_store[metric] = {}
@ -106,36 +109,36 @@ class Check(object):
return self.is_metric(metric) and not self.is_counter(metric) return self.is_metric(metric) and not self.is_counter(metric)
def get_metric_names(self): def get_metric_names(self):
"Get all metric names" """Get all metric names.
"""
return self._sample_store.keys() return self._sample_store.keys()
def save_gauge(self, metric, value, timestamp=None, def save_gauge(self, metric, value, timestamp=None,
dimensions=None, hostname=None, device_name=None): dimensions=None, hostname=None, device_name=None):
""" Save a gauge value. """ """Save a gauge value.
"""
if not self.is_gauge(metric): if not self.is_gauge(metric):
self.gauge(metric) self.gauge(metric)
self.save_sample(metric, value, timestamp, dimensions, hostname, device_name) self.save_sample(metric, value, timestamp, dimensions, hostname, device_name)
def save_sample(self, metric, value, timestamp=None, def save_sample(self, metric, value, timestamp=None,
dimensions=None, hostname=None, device_name=None): dimensions=None, hostname=None, device_name=None):
"""Save a simple sample, evict old values if needed """Save a simple sample, evict old values if needed.
""" """
if dimensions is None: if dimensions is None:
dimensions = {} dimensions = {}
from common.util import cast_metric_val
if timestamp is None: if timestamp is None:
timestamp = time.time() timestamp = time.time()
if metric not in self._sample_store: if metric not in self._sample_store:
raise CheckException("Saving a sample for an undefined metric: %s" % metric) raise monagent.common.exceptions.CheckException("Saving a sample for an undefined metric: %s" % metric)
try: try:
value = cast_metric_val(value) value = monagent.common.util.cast_metric_val(value)
except ValueError as ve: except ValueError as ve:
raise NaN(ve) raise monagent.common.exceptions.NaN(ve)
# Sort and validate dimensions # Sort and validate dimensions
if dimensions is not None and not isinstance(dimensions, dict): if dimensions is not None and not isinstance(dimensions, dict):
raise CheckException("Dimensions must be a dictionary") raise monagent.common.exceptions.CheckException("Dimensions must be a dictionary")
# Data eviction rules # Data eviction rules
key = (tuple(sorted(dimensions.items())), device_name) key = (tuple(sorted(dimensions.items())), device_name)
@ -148,8 +151,8 @@ class Check(object):
self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \ self._sample_store[metric][key] = self._sample_store[metric][key][-1:] + \
[(timestamp, value, hostname, device_name)] [(timestamp, value, hostname, device_name)]
else: else:
raise CheckException("%s must be either gauge or counter, skipping sample at %s" % raise monagent.common.exceptions.CheckException("%s must be either gauge or counter, skipping sample at %s" %
(metric, time.ctime(timestamp))) (metric, time.ctime(timestamp)))
if self.is_gauge(metric): if self.is_gauge(metric):
# store[metric][dimensions] = (ts, val) - only 1 value allowed # store[metric][dimensions] = (ts, val) - only 1 value allowed
@ -159,26 +162,27 @@ class Check(object):
@classmethod @classmethod
def _rate(cls, sample1, sample2): def _rate(cls, sample1, sample2):
"Simple rate" """Simple rate.
"""
try: try:
interval = sample2[0] - sample1[0] interval = sample2[0] - sample1[0]
if interval == 0: if interval == 0:
raise Infinity() raise monagent.common.exceptions.Infinity()
delta = sample2[1] - sample1[1] delta = sample2[1] - sample1[1]
if delta < 0: if delta < 0:
raise UnknownValue() raise monagent.common.exceptions.UnknownValue()
return (sample2[0], delta / interval, sample2[2], sample2[3]) return (sample2[0], delta / interval, sample2[2], sample2[3])
except Infinity: except monagent.common.exceptions.Infinity:
raise raise
except UnknownValue: except monagent.common.exceptions.UnknownValue:
raise raise
except Exception as e: except Exception as e:
raise NaN(e) raise monagent.common.exceptions.NaN(e)
def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True): def get_sample_with_timestamp(self, metric, dimensions=None, device_name=None, expire=True):
"""Get (timestamp-epoch-style, value) """Get (timestamp-epoch-style, value).
""" """
if dimensions is None: if dimensions is None:
dimensions = {} dimensions = {}
@ -188,11 +192,11 @@ class Check(object):
# Never seen this metric # Never seen this metric
if metric not in self._sample_store: if metric not in self._sample_store:
raise UnknownValue() raise monagent.common.exceptions.UnknownValue()
# Not enough value to compute rate # Not enough value to compute rate
elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2: elif self.is_counter(metric) and len(self._sample_store[metric][key]) < 2:
raise UnknownValue() raise monagent.common.exceptions.UnknownValue()
elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2: elif self.is_counter(metric) and len(self._sample_store[metric][key]) >= 2:
res = self._rate( res = self._rate(
@ -205,16 +209,18 @@ class Check(object):
return self._sample_store[metric][key][-1] return self._sample_store[metric][key][-1]
else: else:
raise UnknownValue() raise monagent.common.exceptions.UnknownValue()
def get_sample(self, metric, dimensions=None, device_name=None, expire=True): def get_sample(self, metric, dimensions=None, device_name=None, expire=True):
"Return the last value for that metric" """Return the last value for that metric.
"""
x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire) x = self.get_sample_with_timestamp(metric, dimensions, device_name, expire)
assert isinstance(x, tuple) and len(x) == 4, x assert isinstance(x, tuple) and len(x) == 4, x
return x[1] return x[1]
def get_samples_with_timestamps(self, expire=True): def get_samples_with_timestamps(self, expire=True):
"Return all values {metric: (ts, value)} for non-tagged metrics" """Return all values {metric: (ts, value)} for non-tagged metrics.
"""
values = {} values = {}
for m in self._sample_store: for m in self._sample_store:
try: try:
@ -224,7 +230,8 @@ class Check(object):
return values return values
def get_samples(self, expire=True): def get_samples(self, expire=True):
"Return all values {metric: value} for non-tagged metrics" """Return all values {metric: value} for non-tagged metrics.
"""
values = {} values = {}
for m in self._sample_store: for m in self._sample_store:
try: try:
@ -234,8 +241,9 @@ class Check(object):
pass pass
return values return values
def get_metrics(self, expire=True): def get_metrics(self, expire=True, prettyprint=False):
"""Get all metrics, including the ones that are tagged. """Get all metrics, including the ones that are tagged.
This is the preferred method to retrieve metrics This is the preferred method to retrieve metrics
@return the list of samples @return the list of samples
@ -251,7 +259,7 @@ class Check(object):
try: try:
ts, val, hostname, device_name = self.get_sample_with_timestamp( ts, val, hostname, device_name = self.get_sample_with_timestamp(
m, dimensions, device_name, expire) m, dimensions, device_name, expire)
except UnknownValue: except monagent.common.exceptions.UnknownValue:
continue continue
attributes = {} attributes = {}
if dimensions_list: if dimensions_list:
@ -273,27 +281,22 @@ class AgentCheck(object):
keystone = None keystone = None
def __init__(self, name, init_config, agent_config, instances=None): def __init__(self, name, init_config, agent_config, instances=None):
""" """Initialize a new check.
Initialize a new check.
:param name: The name of the check :param name: The name of the check
:param init_config: The config for initializing the check :param init_config: The config for initializing the check
:param agent_config: The global configuration for the agent :param agent_config: The global configuration for the agent
:param instances: A list of configuration objects for each instance. :param instances: A list of configuration objects for each instance.
""" """
from monagent.common.aggregator import MetricsAggregator
self.name = name self.name = name
self.init_config = init_config self.init_config = init_config
self.agent_config = agent_config self.agent_config = agent_config
self.hostname = get_hostname(agent_config) self.hostname = monagent.common.util.get_hostname(agent_config)
self.log = logging.getLogger('%s.%s' % (__name__, name)) self.log = logging.getLogger('%s.%s' % (__name__, name))
self.aggregator = MetricsAggregator( self.aggregator = monagent.common.aggregator.MetricsAggregator(self.hostname,
self.hostname, recent_point_threshold=agent_config.get('recent_point_threshold',
recent_point_threshold=agent_config.get( None))
'recent_point_threshold',
None))
self.events = [] self.events = []
self.instances = instances or [] self.instances = instances or []
@ -301,20 +304,19 @@ class AgentCheck(object):
self.library_versions = None self.library_versions = None
api_config = self.agent_config['Api'] api_config = self.agent_config['Api']
AgentCheck.keystone = Keystone(api_config['keystone_url'], AgentCheck.keystone = monagent.common.keystone.Keystone(api_config['keystone_url'],
api_config['username'], api_config['username'],
api_config['password'], api_config['password'],
api_config['project_name']) api_config['project_name'])
def instance_count(self): def instance_count(self):
""" Return the number of instances that are configured for this check. """ """Return the number of instances that are configured for this check.
"""
return len(self.instances) return len(self.instances)
def gauge(self, metric, value, dimensions=None, def gauge(self, metric, value, dimensions=None,
hostname=None, device_name=None, timestamp=None): hostname=None, device_name=None, timestamp=None):
""" """Record the value of a gauge, with optional dimensions, hostname and device name.
Record the value of a gauge, with optional dimensions, hostname and device
name.
:param metric: The name of the metric :param metric: The name of the metric
:param value: The value of the gauge :param value: The value of the gauge
@ -326,8 +328,7 @@ class AgentCheck(object):
self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp) self.aggregator.gauge(metric, value, dimensions, hostname, device_name, timestamp)
def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None): def increment(self, metric, value=1, dimensions=None, hostname=None, device_name=None):
""" """Increment a counter with optional dimensions, hostname and device name.
Increment a counter with optional dimensions, hostname and device name.
:param metric: The name of the metric :param metric: The name of the metric
:param value: The value to increment by :param value: The value to increment by
@ -338,8 +339,7 @@ class AgentCheck(object):
self.aggregator.increment(metric, value, dimensions, hostname, device_name) self.aggregator.increment(metric, value, dimensions, hostname, device_name)
def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None): def decrement(self, metric, value=-1, dimensions=None, hostname=None, device_name=None):
""" """Decrement a counter with optional dimensions, hostname and device name.
Increment a counter with optional dimensions, hostname and device name.
:param metric: The name of the metric :param metric: The name of the metric
:param value: The value to decrement by :param value: The value to decrement by
@ -350,8 +350,8 @@ class AgentCheck(object):
self.aggregator.decrement(metric, value, dimensions, hostname, device_name) self.aggregator.decrement(metric, value, dimensions, hostname, device_name)
def rate(self, metric, value, dimensions=None, hostname=None, device_name=None): def rate(self, metric, value, dimensions=None, hostname=None, device_name=None):
""" """Submit a point for a metric that will be calculated as a rate on flush.
Submit a point for a metric that will be calculated as a rate on flush.
Values will persist across each call to `check` if there is not enough Values will persist across each call to `check` if there is not enough
point to generate a rate on the flush. point to generate a rate on the flush.
@ -364,8 +364,7 @@ class AgentCheck(object):
self.aggregator.rate(metric, value, dimensions, hostname, device_name) self.aggregator.rate(metric, value, dimensions, hostname, device_name)
def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None): def histogram(self, metric, value, dimensions=None, hostname=None, device_name=None):
""" """Sample a histogram value, with optional dimensions, hostname and device name.
Sample a histogram value, with optional dimensions, hostname and device name.
:param metric: The name of the metric :param metric: The name of the metric
:param value: The value to sample for the histogram :param value: The value to sample for the histogram
@ -376,8 +375,7 @@ class AgentCheck(object):
self.aggregator.histogram(metric, value, dimensions, hostname, device_name) self.aggregator.histogram(metric, value, dimensions, hostname, device_name)
def set(self, metric, value, dimensions=None, hostname=None, device_name=None): def set(self, metric, value, dimensions=None, hostname=None, device_name=None):
""" """Sample a set value, with optional dimensions, hostname and device name.
Sample a set value, with optional dimensions, hostname and device name.
:param metric: The name of the metric :param metric: The name of the metric
:param value: The value for the set :param value: The value for the set
@ -388,8 +386,7 @@ class AgentCheck(object):
self.aggregator.set(metric, value, dimensions, hostname, device_name) self.aggregator.set(metric, value, dimensions, hostname, device_name)
def event(self, event): def event(self, event):
""" """Save an event.
Save an event.
:param event: The event payload as a dictionary. Has the following :param event: The event payload as a dictionary. Has the following
structure: structure:
@ -412,8 +409,7 @@ class AgentCheck(object):
self.events.append(event) self.events.append(event)
def has_events(self): def has_events(self):
""" """Check whether the check has saved any events
Check whether the check has saved any events
@return whether or not the check has saved any events @return whether or not the check has saved any events
@rtype boolean @rtype boolean
@ -421,8 +417,7 @@ class AgentCheck(object):
return len(self.events) > 0 return len(self.events) > 0
def get_metrics(self, prettyprint=False): def get_metrics(self, prettyprint=False):
""" """Get all metrics, including the ones that are tagged.
Get all metrics, including the ones that are tagged.
@return the list of samples @return the list of samples
@rtype list of Measurement objects from monagent.common.metrics @rtype list of Measurement objects from monagent.common.metrics
@ -444,8 +439,7 @@ class AgentCheck(object):
return self.aggregator.flush() return self.aggregator.flush()
def get_events(self): def get_events(self):
""" """Return a list of the events saved by the check, if any
Return a list of the events saved by the check, if any
@return the list of events saved by this check @return the list of events saved by this check
@rtype list of event dictionaries @rtype list of event dictionaries
@ -455,13 +449,13 @@ class AgentCheck(object):
return events return events
def has_warnings(self): def has_warnings(self):
""" """Check whether the instance run created any warnings.
Check whether the instance run created any warnings
""" """
return len(self.warnings) > 0 return len(self.warnings) > 0
def warning(self, warning_message): def warning(self, warning_message):
""" Add a warning message that will be printed in the info page """Add a warning message that will be printed in the info page
:param warning_message: String. Warning message to be displayed :param warning_message: String. Warning message to be displayed
""" """
self.warnings.append(warning_message) self.warnings.append(warning_message)
@ -475,43 +469,45 @@ class AgentCheck(object):
pass pass
def get_library_versions(self): def get_library_versions(self):
""" Should return a string that shows which version """Should return a string that shows which version
of the needed libraries are used """
of the needed libraries are used
"""
raise NotImplementedError raise NotImplementedError
def get_warnings(self): def get_warnings(self):
""" """Return the list of warnings messages to be displayed in the info page.
Return the list of warnings messages to be displayed in the info page
""" """
warnings = self.warnings warnings = self.warnings
self.warnings = [] self.warnings = []
return warnings return warnings
def run(self): def run(self):
""" Run all instances. """ """Run all instances.
"""
instance_statuses = [] instance_statuses = []
for i, instance in enumerate(self.instances): for i, instance in enumerate(self.instances):
try: try:
instance['keystone'] = AgentCheck.keystone instance['keystone'] = AgentCheck.keystone
self.check(instance) self.check(instance)
if self.has_warnings(): if self.has_warnings():
instance_status = check_status.InstanceStatus(i, instance_status = monagent.common.check_status.InstanceStatus(i,
check_status.STATUS_WARNING, monagent.common.check_status.STATUS_WARNING,
warnings=self.get_warnings()) warnings=self.get_warnings())
else: else:
instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK) instance_status = monagent.common.check_status.InstanceStatus(i,
monagent.common.check_status.STATUS_OK)
except Exception as e: except Exception as e:
self.log.exception("Check '%s' instance #%s failed" % (self.name, i)) self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
instance_status = check_status.InstanceStatus(i, instance_status = monagent.common.check_status.InstanceStatus(i,
check_status.STATUS_ERROR, monagent.common.check_status.STATUS_ERROR,
error=e, error=e,
tb=traceback.format_exc()) tb=traceback.format_exc())
instance_statuses.append(instance_status) instance_statuses.append(instance_status)
return instance_statuses return instance_statuses
def check(self, instance): def check(self, instance):
""" """Overriden by the check class. This will be called to run the check.
Overriden by the check class. This will be called to run the check.
:param instance: A dict with the instance information. This will vary :param instance: A dict with the instance information. This will vary
depending on your config structure. depending on your config structure.
@ -520,21 +516,19 @@ class AgentCheck(object):
@staticmethod @staticmethod
def stop(): def stop():
""" """To be executed when the agent is being stopped to clean ressources.
To be executed when the agent is being stopped to clean ressources
""" """
pass pass
@classmethod @classmethod
def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None): def from_yaml(cls, path_to_yaml=None, agentConfig=None, yaml_text=None, check_name=None):
"""A method used for testing your check without running the agent.
""" """
A method used for testing your check without running the agent. if hasattr(yaml, 'CLoader'):
""" Loader = yaml.CLoader
import yaml else:
try: Loader = yaml.Loader
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
if path_to_yaml: if path_to_yaml:
check_name = os.path.basename(path_to_yaml).split('.')[0] check_name = os.path.basename(path_to_yaml).split('.')[0]
try: try:
@ -551,9 +545,7 @@ class AgentCheck(object):
@staticmethod @staticmethod
def normalize(metric, prefix=None): def normalize(metric, prefix=None):
""" """Turn a metric into a well-formed metric name prefix.b.c
Turn a metric into a well-formed metric name
prefix.b.c
:param metric The metric name to normalize :param metric The metric name to normalize
:param prefix A prefix to to add to the normalized name, default None :param prefix A prefix to to add to the normalized name, default None
@ -587,10 +579,11 @@ class AgentCheck(object):
def run_check(name, path=None): def run_check(name, path=None):
from tests.common import get_check import tests.common
# Read the config file # Read the config file
confd_path = path or os.path.join(get_confd_path(get_os()), '%s.yaml' % name) confd_path = path or os.path.join(monagent.common.config.get_confd_path(monagent.common.util.get_os()),
'%s.yaml' % name)
try: try:
f = open(confd_path) f = open(confd_path)
@ -601,13 +594,13 @@ def run_check(name, path=None):
f.close() f.close()
# Run the check # Run the check
check, instances = get_check(name, config_str) check, instances = tests.common.get_check(name, config_str)
if not instances: if not instances:
raise Exception('YAML configuration returned no instances.') raise Exception('YAML configuration returned no instances.')
for instance in instances: for instance in instances:
check.check(instance) check.check(instance)
if check.has_events(): if check.has_events():
print("Events:\n") print("Events:\n")
pprint(check.get_events(), indent=4) pprint.pprint(check.get_events(), indent=4)
print("Metrics:\n") print("Metrics:\n")
pprint(check.get_metrics(), indent=4) pprint.pprint(check.get_metrics(), indent=4)

View File

@ -1,15 +1,14 @@
# Core modules # Core modules
import logging import logging
import threading
import time
import socket import socket
from monagent.common.metrics import Measurement
from monagent.common.util import get_os, Timer
import system.unix as u import system.unix as u
import system.win32 as w32 import system.win32 as w32
from datadog import Dogstreams import threading
from monagent.common.check_status import CheckStatus, CollectorStatus, EmitterStatus import time
import monagent.common.check_status
import monagent.common.metrics
import monagent.common.util
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -25,15 +24,15 @@ FLUSH_LOGGING_INITIAL = 5
class Collector(object): class Collector(object):
""" """The collector is responsible for collecting data from each check and
The collector is responsible for collecting data from each check and
passing it along to the emitters, who send it to their final destination. passing it along to the emitters, who send it to their final destination.
""" """
def __init__(self, agent_config, emitter, checksd=None): def __init__(self, agent_config, emitter, checksd=None):
self.emit_duration = None self.emit_duration = None
self.agent_config = agent_config self.agent_config = agent_config
self.os = get_os() self.os = monagent.common.util.get_os()
self.plugins = None self.plugins = None
self.emitter = emitter self.emitter = emitter
socket.setdefaulttimeout(15) socket.setdefaulttimeout(15)
@ -74,23 +73,24 @@ class Collector(object):
self.init_failed_checks_d = checksd['init_failed_checks'] self.init_failed_checks_d = checksd['init_failed_checks']
def _emit(self, payload): def _emit(self, payload):
""" Send the payload via the emitter. """ """Send the payload via the emitter.
"""
statuses = [] statuses = []
# Don't try to send to an emitter if we're stopping/ # Don't try to send to an emitter if we're stopping/
if self.continue_running: if self.continue_running:
name = self.emitter.__name__ name = self.emitter.__name__
emitter_status = EmitterStatus(name) emitter_status = monagent.common.check_status.EmitterStatus(name)
try: try:
self.emitter(payload, log, self.agent_config['forwarder_url']) self.emitter(payload, log, self.agent_config['forwarder_url'])
except Exception as e: except Exception as e:
log.exception("Error running emitter: %s" % self.emitter.__name__) log.exception("Error running emitter: %s" % self.emitter.__name__)
emitter_status = EmitterStatus(name, e) emitter_status = monagent.common.check_status.EmitterStatus(name, e)
statuses.append(emitter_status) statuses.append(emitter_status)
return statuses return statuses
def _set_status(self, check_statuses, emitter_statuses, collect_duration): def _set_status(self, check_statuses, emitter_statuses, collect_duration):
try: try:
CollectorStatus(check_statuses, emitter_statuses).persist() monagent.common.check_status.CollectorStatus(check_statuses, emitter_statuses).persist()
except Exception: except Exception:
log.exception("Error persisting collector status") log.exception("Error persisting collector status")
@ -125,13 +125,11 @@ class Collector(object):
return metrics return metrics
def run(self): def run(self):
""" """Collect data from each check and submit their data.
Collect data from each check and submit their data.
There are currently two types of checks the system checks and the configured ones from checks_d There are currently two types of checks the system checks and the configured ones from checks_d
""" """
timer = Timer() timer = monagent.common.util.Timer()
if self.os != 'windows':
cpu_clock = time.clock()
self.run_count += 1 self.run_count += 1
log.debug("Starting collection run #%s" % self.run_count) log.debug("Starting collection run #%s" % self.run_count)
@ -144,7 +142,7 @@ class Collector(object):
for check_type in self._legacy_checks: for check_type in self._legacy_checks:
try: try:
for name, value in check_type.check().iteritems(): for name, value in check_type.check().iteritems():
metrics_list.append(Measurement(name, timestamp, value, {})) metrics_list.append(monagent.common.metrics.Measurement(name, timestamp, value, {}))
except Exception: except Exception:
log.exception('Error running check.') log.exception('Error running check.')
@ -163,10 +161,10 @@ class Collector(object):
# Add in metrics on the collector run, emit_duration is from the previous run # Add in metrics on the collector run, emit_duration is from the previous run
for name, value in self.collector_stats(len(metrics_list), len(events), for name, value in self.collector_stats(len(metrics_list), len(events),
collect_duration, self.emit_duration).iteritems(): collect_duration, self.emit_duration).iteritems():
metrics_list.append(Measurement(name, metrics_list.append(monagent.common.metrics.Measurement(name,
timestamp, timestamp,
value, value,
{'service': 'monasca', 'component': 'collector'})) {'service': 'monasca', 'component': 'collector'}))
emitter_statuses = self._emit(metrics_list) emitter_statuses = self._emit(metrics_list)
self.emit_duration = timer.step() self.emit_duration = timer.step()
@ -175,8 +173,9 @@ class Collector(object):
self._set_status(checks_statuses, emitter_statuses, collect_duration) self._set_status(checks_statuses, emitter_statuses, collect_duration)
def run_checks_d(self): def run_checks_d(self):
""" Run defined checks_d checks. """Run defined checks_d checks.
returns a list of Measurements, a dictionary of events and a list of check statuses.
returns a list of Measurements, a dictionary of events and a list of check statuses.
""" """
measurements = [] measurements = []
events = {} events = {}
@ -210,23 +209,22 @@ class Collector(object):
except Exception: except Exception:
log.exception("Error running check %s" % check.name) log.exception("Error running check %s" % check.name)
check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count, check_status = monagent.common.check_status.CheckStatus(check.name, instance_statuses, metric_count, event_count,
library_versions=check.get_library_info()) library_versions=check.get_library_info())
check_statuses.append(check_status) check_statuses.append(check_status)
for check_name, info in self.init_failed_checks_d.iteritems(): for check_name, info in self.init_failed_checks_d.iteritems():
if not self.continue_running: if not self.continue_running:
return return
check_status = CheckStatus(check_name, None, None, None, check_status = monagent.common.check_status.CheckStatus(check_name, None, None, None,
init_failed_error=info['error'], init_failed_error=info['error'],
init_failed_traceback=info['traceback']) init_failed_traceback=info['traceback'])
check_statuses.append(check_status) check_statuses.append(check_status)
return measurements, events, check_statuses return measurements, events, check_statuses
def stop(self): def stop(self):
""" """Tell the collector to stop at the next logical point.
Tell the collector to stop at the next logical point.
""" """
# This is called when the process is being killed, so # This is called when the process is being killed, so
# try to stop the collector as soon as possible. # try to stop the collector as soon as possible.

View File

@ -1,13 +1,13 @@
import datetime
import itertools
import os import os
import traceback
import re import re
import time import time
from datetime import datetime import traceback
from itertools import groupby # >= python 2.4
from utils import TailFile import monagent.collector
from monagent.common.util import LaconicFilter import monagent.common.util
from monagent.collector import modules import utils
if hasattr('some string', 'partition'): if hasattr('some string', 'partition'):
@ -111,7 +111,7 @@ class Dogstream(object):
if parser_spec: if parser_spec:
try: try:
parse_func = modules.load(parser_spec, 'parser') parse_func = monagent.collector.modules.load(parser_spec, 'parser')
if isinstance(parse_func, type): if isinstance(parse_func, type):
logger.info('Instantiating class-based dogstream') logger.info('Instantiating class-based dogstream')
parse_func = parse_func( parse_func = parse_func(
@ -142,7 +142,7 @@ class Dogstream(object):
self.class_based = class_based self.class_based = class_based
# Apply LaconicFilter to avoid log flooding # Apply LaconicFilter to avoid log flooding
self.logger.addFilter(LaconicFilter("dogstream")) self.logger.addFilter(monagent.common.util.LaconicFilter("dogstream"))
self.log_path = log_path self.log_path = log_path
self.parse_func = parse_func or self._default_line_parser self.parse_func = parse_func or self._default_line_parser
@ -163,7 +163,7 @@ class Dogstream(object):
# Build our tail -f # Build our tail -f
if self._gen is None: if self._gen is None:
self._gen = TailFile( self._gen = utils.TailFile(
self.logger, self.logger,
self.log_path, self.log_path,
self._line_parser).tail( self._line_parser).tail(
@ -202,7 +202,7 @@ class Dogstream(object):
else: else:
try: try:
parsed = self.parse_func(self.logger, line, self.parser_state, *self.parse_args) parsed = self.parse_func(self.logger, line, self.parser_state, *self.parse_args)
except TypeError as e: except TypeError:
# Arity of parse_func is 3 (old-style), not 4 # Arity of parse_func is 3 (old-style), not 4
parsed = self.parse_func(self.logger, line) parsed = self.parse_func(self.logger, line)
@ -250,7 +250,7 @@ class Dogstream(object):
try: try:
# Bucket points into 15 second buckets # Bucket points into 15 second buckets
ts = (int(float(ts)) / self._freq) * self._freq ts = (int(float(ts)) / self._freq) * self._freq
date = datetime.fromtimestamp(ts) date = datetime.datetime.fromtimestamp(ts)
assert date.year > 1990 assert date.year > 1990
except Exception: except Exception:
invalid_reasons.append('invalid timestamp') invalid_reasons.append('invalid timestamp')
@ -265,14 +265,13 @@ class Dogstream(object):
repr(datum), ', '.join(invalid_reasons), line) repr(datum), ', '.join(invalid_reasons), line)
else: else:
self._values.append((metric, ts, value, attrs)) self._values.append((metric, ts, value, attrs))
except Exception as e: except Exception:
self.logger.debug("Error while parsing line %s" % line, exc_info=True) self.logger.debug("Error while parsing line %s" % line, exc_info=True)
self._error_count += 1 self._error_count += 1
self.logger.error("Parser error: %s out of %s" % (self._error_count, self._line_count)) self.logger.error("Parser error: %s out of %s" % (self._error_count, self._line_count))
@staticmethod @staticmethod
def _default_line_parser(logger, line): def _default_line_parser(logger, line):
original_line = line
sep = ' ' sep = ' '
metric, _, line = partition(line.strip(), sep) metric, _, line = partition(line.strip(), sep)
timestamp, _, line = partition(line.strip(), sep) timestamp, _, line = partition(line.strip(), sep)
@ -284,13 +283,14 @@ class Dogstream(object):
keyval, _, line = partition(line.strip(), sep) keyval, _, line = partition(line.strip(), sep)
key, val = keyval.split('=', 1) key, val = keyval.split('=', 1)
attributes[key] = val attributes[key] = val
except Exception as e: except Exception:
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())
return metric, timestamp, value, attributes return metric, timestamp, value, attributes
def _aggregate(self, values): def _aggregate(self, values):
""" Aggregate values down to the second and store as: """Aggregate values down to the second and store as:
{ {
"dogstream": [(metric, timestamp, value, {key: val})] "dogstream": [(metric, timestamp, value, {key: val})]
} }
@ -300,7 +300,7 @@ class Dogstream(object):
values.sort(key=point_sorter) values.sort(key=point_sorter)
for (timestamp, metric, host_name, device_name), val_attrs in groupby(values, key=point_sorter): for (timestamp, metric, host_name, device_name), val_attrs in itertools.groupby(values, key=point_sorter):
attributes = {} attributes = {}
vals = [] vals = []
for _metric, _timestamp, v, a in val_attrs: for _metric, _timestamp, v, a in val_attrs:
@ -519,6 +519,3 @@ class NagiosServicePerfData(NagiosPerfData):
if middle_name: if middle_name:
metric.append(middle_name.replace(' ', '_').lower()) metric.append(middle_name.replace(' ', '_').lower())
return metric return metric
if __name__ == '__main__':
testddForwarder()

View File

@ -32,28 +32,34 @@ SENTINEL = "QUIT"
def is_sentinel(obj): def is_sentinel(obj):
"""Predicate to determine whether an item from the queue is the """Predicate to determine whether an item from the queue is the
signal to stop"""
signal to stop
"""
return isinstance(obj, str) and obj == SENTINEL return isinstance(obj, str) and obj == SENTINEL
class TimeoutError(Exception): class TimeoutError(Exception):
"""Raised when a result is not available within the given timeout""" """Raised when a result is not available within the given timeout.
"""
pass pass
class PoolWorker(threading.Thread): class PoolWorker(threading.Thread):
"""Thread that consumes WorkUnits from a queue to process them""" """Thread that consumes WorkUnits from a queue to process them.
"""
def __init__(self, workq, *args, **kwds): def __init__(self, workq, *args, **kwds):
"""\param workq: Queue object to consume the work units from""" """\param workq: Queue object to consume the work units from.
"""
threading.Thread.__init__(self, *args, **kwds) threading.Thread.__init__(self, *args, **kwds)
self._workq = workq self._workq = workq
self.running = False self.running = False
def run(self): def run(self):
"""Process the work unit, or wait for sentinel to exit""" """Process the work unit, or wait for sentinel to exit.
"""
while True: while True:
self.running = True self.running = True
workunit = self._workq.get() workunit = self._workq.get()
@ -69,14 +75,15 @@ class PoolWorker(threading.Thread):
class Pool(object): class Pool(object):
""" """
The Pool class represents a pool of worker threads. It has methods The Pool class represents a pool of worker threads.
which allows tasks to be offloaded to the worker processes in a
few different ways It has methods which allows tasks to be offloaded to the
worker processes in a few different ways.
""" """
def __init__(self, nworkers, name="Pool"): def __init__(self, nworkers, name="Pool"):
""" """\param nworkers (integer) number of worker threads to start
\param nworkers (integer) number of worker threads to start
\param name (string) prefix for the worker threads' name \param name (string) prefix for the worker threads' name
""" """
self._workq = Queue.Queue() self._workq = Queue.Queue()
@ -86,7 +93,7 @@ class Pool(object):
thr = PoolWorker(self._workq, name="Worker-%s-%d" % (name, idx)) thr = PoolWorker(self._workq, name="Worker-%s-%d" % (name, idx))
try: try:
thr.start() thr.start()
except: except Exception:
# If one thread has a problem, undo everything # If one thread has a problem, undo everything
self.terminate() self.terminate()
raise raise
@ -97,25 +104,28 @@ class Pool(object):
return len([w for w in self._workers if w.running]) return len([w for w in self._workers if w.running])
def apply(self, func, args=(), kwds=None): def apply(self, func, args=(), kwds=None):
"""Equivalent of the apply() builtin function. It blocks till """Equivalent of the apply() builtin function.
the result is ready."""
It blocks till the result is ready.
"""
if not kwds: if not kwds:
kwds = dict() kwds = dict()
return self.apply_async(func, args, kwds).get() return self.apply_async(func, args, kwds).get()
def map(self, func, iterable, chunksize=None): def map(self, func, iterable, chunksize=None):
"""A parallel equivalent of the map() builtin function. It """A parallel equivalent of the map() builtin function.
blocks till the result is ready.
It blocks till the result is ready.
This method chops the iterable into a number of chunks which This method chops the iterable into a number of chunks which
it submits to the process pool as separate tasks. The it submits to the process pool as separate tasks. The
(approximate) size of these chunks can be specified by setting (approximate) size of these chunks can be specified by setting
chunksize to a positive integer.""" chunksize to a positive integer.
"""
return self.map_async(func, iterable, chunksize).get() return self.map_async(func, iterable, chunksize).get()
def imap(self, func, iterable, chunksize=1): def imap(self, func, iterable, chunksize=1):
""" """An equivalent of itertools.imap().
An equivalent of itertools.imap().
The chunksize argument is the same as the one used by the The chunksize argument is the same as the one used by the
map() method. For very long iterables using a large value for map() method. For very long iterables using a large value for
@ -133,22 +143,24 @@ class Pool(object):
def imap_unordered(self, func, iterable, chunksize=1): def imap_unordered(self, func, iterable, chunksize=1):
"""The same as imap() except that the ordering of the results """The same as imap() except that the ordering of the results
from the returned iterator should be considered
arbitrary. (Only when there is only one worker process is the from the returned iterator should be considered arbitrary.
order guaranteed to be "correct".)""" (Only when there is only one worker process is the order
guaranteed to be "correct".)
"""
collector = UnorderedResultCollector() collector = UnorderedResultCollector()
self._create_sequences(func, iterable, chunksize, collector) self._create_sequences(func, iterable, chunksize, collector)
return iter(collector) return iter(collector)
def apply_async(self, func, args=(), kwds=None, callback=None): def apply_async(self, func, args=(), kwds=None, callback=None):
"""A variant of the apply() method which returns an """A variant of the apply() method which returns an ApplyResult object.
ApplyResult object.
If callback is specified then it should be a callable which If callback is specified then it should be a callable which
accepts a single argument. When the result becomes ready, accepts a single argument. When the result becomes ready,
callback is applied to it (unless the call failed). callback callback is applied to it (unless the call failed). callback
should complete immediately since otherwise the thread which should complete immediately since otherwise the thread which
handles the results will get blocked.""" handles the results will get blocked.
"""
if not kwds: if not kwds:
kwds = dict() kwds = dict()
assert not self._closed # No lock here. We assume it's atomic... assert not self._closed # No lock here. We assume it's atomic...
@ -158,14 +170,14 @@ class Pool(object):
return apply_result return apply_result
def map_async(self, func, iterable, chunksize=None, callback=None): def map_async(self, func, iterable, chunksize=None, callback=None):
"""A variant of the map() method which returns a ApplyResult """A variant of the map() method which returns a ApplyResult object.
object.
If callback is specified then it should be a callable which If callback is specified then it should be a callable which
accepts a single argument. When the result becomes ready accepts a single argument. When the result becomes ready
callback is applied to it (unless the call failed). callback callback is applied to it (unless the call failed). callback
should complete immediately since otherwise the thread which should complete immediately since otherwise the thread which
handles the results will get blocked.""" handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback) apply_result = ApplyResult(callback=callback)
collector = OrderedResultCollector(apply_result, as_iterator=False) collector = OrderedResultCollector(apply_result, as_iterator=False)
self._create_sequences(func, iterable, chunksize, collector) self._create_sequences(func, iterable, chunksize, collector)
@ -173,6 +185,7 @@ class Pool(object):
def imap_async(self, func, iterable, chunksize=None, callback=None): def imap_async(self, func, iterable, chunksize=None, callback=None):
"""A variant of the imap() method which returns an ApplyResult """A variant of the imap() method which returns an ApplyResult
object that provides an iterator (next method(timeout) object that provides an iterator (next method(timeout)
available). available).
@ -180,7 +193,8 @@ class Pool(object):
accepts a single argument. When the resulting iterator becomes accepts a single argument. When the resulting iterator becomes
ready, callback is applied to it (unless the call ready, callback is applied to it (unless the call
failed). callback should complete immediately since otherwise failed). callback should complete immediately since otherwise
the thread which handles the results will get blocked.""" the thread which handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback) apply_result = ApplyResult(callback=callback)
collector = OrderedResultCollector(apply_result, as_iterator=True) collector = OrderedResultCollector(apply_result, as_iterator=True)
self._create_sequences(func, iterable, chunksize, collector) self._create_sequences(func, iterable, chunksize, collector)
@ -189,30 +203,35 @@ class Pool(object):
def imap_unordered_async(self, func, iterable, chunksize=None, def imap_unordered_async(self, func, iterable, chunksize=None,
callback=None): callback=None):
"""A variant of the imap_unordered() method which returns an """A variant of the imap_unordered() method which returns an
ApplyResult object that provides an iterator (next
method(timeout) available). ApplyResult object that provides an iterator (next method(timeout)
available).
If callback is specified then it should be a callable which If callback is specified then it should be a callable which
accepts a single argument. When the resulting iterator becomes accepts a single argument. When the resulting iterator becomes
ready, callback is applied to it (unless the call ready, callback is applied to it (unless the call
failed). callback should complete immediately since otherwise failed). callback should complete immediately since otherwise
the thread which handles the results will get blocked.""" the thread which handles the results will get blocked.
"""
apply_result = ApplyResult(callback=callback) apply_result = ApplyResult(callback=callback)
collector = UnorderedResultCollector(apply_result) collector = UnorderedResultCollector(apply_result)
self._create_sequences(func, iterable, chunksize, collector) self._create_sequences(func, iterable, chunksize, collector)
return apply_result return apply_result
def close(self): def close(self):
"""Prevents any more tasks from being submitted to the """Prevents any more tasks from being submitted to the pool.
pool. Once all the tasks have been completed the worker
processes will exit.""" Once all the tasks have been completed the worker
processes will exit.
"""
# No lock here. We assume it's sufficiently atomic... # No lock here. We assume it's sufficiently atomic...
self._closed = True self._closed = True
def terminate(self): def terminate(self):
"""Stops the worker processes immediately without completing """Stops the worker processes immediately without completing outstanding work.
outstanding work. When the pool object is garbage collected
terminate() will be called immediately.""" When the pool object is garbage collected terminate() will be called immediately.
"""
self.close() self.close()
# Clearing the job queue # Clearing the job queue
@ -228,18 +247,19 @@ class Pool(object):
self._workq.put(SENTINEL) self._workq.put(SENTINEL)
def join(self): def join(self):
"""Wait for the worker processes to exit. One must call """Wait for the worker processes to exit.
close() or terminate() before using join()."""
One must call close() or terminate() before using join().
"""
for thr in self._workers: for thr in self._workers:
thr.join() thr.join()
def _create_sequences(self, func, iterable, chunksize, collector=None): def _create_sequences(self, func, iterable, chunksize, collector=None):
""" """Create the WorkUnit objects to process and pushes them on the work queue.
Create the WorkUnit objects to process and pushes them on the
work queue. Each work unit is meant to process a slice of Each work unit is meant to process a slice of iterable of size chunksize.
iterable of size chunksize. If collector is specified, then If collector is specified, then the ApplyResult objects associated with
the ApplyResult objects associated with the jobs will notify the jobs will notify collector when their result becomes ready.
collector when their result becomes ready.
\return the list of WorkUnit objects (basically: JobSequences) \return the list of WorkUnit objects (basically: JobSequences)
pushed onto the work queue pushed onto the work queue
@ -271,8 +291,10 @@ class Pool(object):
class WorkUnit(object): class WorkUnit(object):
"""ABC for a unit of work submitted to the worker threads. It's """ABC for a unit of work submitted to the worker threads.
basically just an object equipped with a process() method"""
It's basically just an object equipped with a process() method
"""
def process(self): def process(self):
"""Do the work. Shouldn't raise any exception""" """Do the work. Shouldn't raise any exception"""
@ -281,11 +303,12 @@ class WorkUnit(object):
class Job(WorkUnit): class Job(WorkUnit):
"""A work unit that corresponds to the execution of a single function""" """A work unit that corresponds to the execution of a single function.
"""
def __init__(self, func, args, kwds, apply_result): def __init__(self, func, args, kwds, apply_result):
""" """\param func/args/kwds used to call the function
\param func/args/kwds used to call the function
\param apply_result ApplyResult object that holds the result \param apply_result ApplyResult object that holds the result
of the function call of the function call
""" """
@ -296,14 +319,14 @@ class Job(WorkUnit):
self._result = apply_result self._result = apply_result
def process(self): def process(self):
""" """Call the function with the args/kwds and tell the ApplyResult
Call the function with the args/kwds and tell the ApplyResult
that its result is ready. Correctly handles the exceptions that its result is ready. Correctly handles the exceptions
happening during the execution of the function happening during the execution of the function.
""" """
try: try:
result = self._func(*self._args, **self._kwds) result = self._func(*self._args, **self._kwds)
except: except Exception:
self._result._set_exception() self._result._set_exception()
else: else:
self._result._set_value(result) self._result._set_value(result)
@ -312,15 +335,15 @@ class Job(WorkUnit):
class JobSequence(WorkUnit): class JobSequence(WorkUnit):
"""A work unit that corresponds to the processing of a continuous """A work unit that corresponds to the processing of a continuous
sequence of Job objects""" sequence of Job objects
"""
def __init__(self, jobs): def __init__(self, jobs):
WorkUnit.__init__(self) WorkUnit.__init__(self)
self._jobs = jobs self._jobs = jobs
def process(self): def process(self):
""" """Call process() on all the Job objects that have been specified.
Call process() on all the Job objects that have been specified
""" """
for job in self._jobs: for job in self._jobs:
job.process() job.process()
@ -329,16 +352,18 @@ class JobSequence(WorkUnit):
class ApplyResult(object): class ApplyResult(object):
"""An object associated with a Job object that holds its result: """An object associated with a Job object that holds its result:
it's available during the whole life the Job and after, even when it's available during the whole life the Job and after, even when
the Job didn't process yet. It's possible to use this object to the Job didn't process yet. It's possible to use this object to
wait for the result/exception of the job to be available. wait for the result/exception of the job to be available.
The result objects returns by the Pool::*_async() methods are of The result objects returns by the Pool::*_async() methods are of
this type""" this type
"""
def __init__(self, collector=None, callback=None): def __init__(self, collector=None, callback=None):
""" """\param collector when not None, the notify_ready() method of
\param collector when not None, the notify_ready() method of
the collector will be called when the result from the Job is the collector will be called when the result from the Job is
ready ready
\param callback when not None, function to call when the \param callback when not None, function to call when the
@ -356,11 +381,11 @@ class ApplyResult(object):
self._collector = collector self._collector = collector
def get(self, timeout=None): def get(self, timeout=None):
""" """Returns the result when it arrives.
Returns the result when it arrives. If timeout is not None and
the result does not arrive within timeout seconds then If timeout is not None and the result does not arrive within timeout
TimeoutError is raised. If the remote call raised an exception seconds then TimeoutError is raised. If the remote call raised an
then that exception will be reraised by get(). exception then that exception will be re-raised by get().
""" """
if not self.wait(timeout): if not self.wait(timeout):
raise TimeoutError("Result not available within %fs" % timeout) raise TimeoutError("Result not available within %fs" % timeout)
@ -369,27 +394,31 @@ class ApplyResult(object):
raise self._data[0], self._data[1], self._data[2] raise self._data[0], self._data[1], self._data[2]
def wait(self, timeout=None): def wait(self, timeout=None):
"""Waits until the result is available or until timeout """Waits until the result is available or until timeout seconds pass.
seconds pass.""" """
self._event.wait(timeout) self._event.wait(timeout)
return self._event.isSet() return self._event.isSet()
def ready(self): def ready(self):
"""Returns whether the call has completed.""" """Returns whether the call has completed.
"""
return self._event.isSet() return self._event.isSet()
def successful(self): def successful(self):
"""Returns whether the call completed without raising an """Returns whether the call completed without raising an exception.
exception. Will raise AssertionError if the result is not
ready.""" Will raise AssertionError if the result is not ready.
"""
assert self.ready() assert self.ready()
return self._success return self._success
def _set_value(self, value): def _set_value(self, value):
"""Called by a Job object to tell the result is ready, and """Called by a Job object to tell the result is ready, and
provides the value of this result. The object will become provides the value of this result. The object will become
ready and successful. The collector's notify_ready() method ready and successful. The collector's notify_ready() method
will be called, and the callback method too""" will be called, and the callback method too.
"""
assert not self.ready() assert not self.ready()
self._data = value self._data = value
self._success = True self._success = True
@ -399,14 +428,16 @@ class ApplyResult(object):
if self._callback is not None: if self._callback is not None:
try: try:
self._callback(value) self._callback(value)
except: except Exception:
traceback.print_exc() traceback.print_exc()
def _set_exception(self): def _set_exception(self):
"""Called by a Job object to tell that an exception occured """Called by a Job object to tell that an exception occured
during the processing of the function. The object will become during the processing of the function. The object will become
ready but not successful. The collector's notify_ready() ready but not successful. The collector's notify_ready()
method will be called, but NOT the callback method""" method will be called, but NOT the callback method
"""
assert not self.ready() assert not self.ready()
self._data = sys.exc_info() self._data = sys.exc_info()
self._success = False self._success = False
@ -417,22 +448,25 @@ class ApplyResult(object):
class AbstractResultCollector(object): class AbstractResultCollector(object):
"""ABC to define the interface of a ResultCollector object. It is """ABC to define the interface of a ResultCollector object.
basically an object which knows whuich results it's waiting for,
It is basically an object which knows whuich results it's waiting for,
and which is able to get notify when they get available. It is and which is able to get notify when they get available. It is
also able to provide an iterator over the results when they are also able to provide an iterator over the results when they are
available""" available.
"""
def __init__(self, to_notify): def __init__(self, to_notify):
""" """\param to_notify ApplyResult object to notify when all the
\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None. results we're waiting for become available. Can be None.
""" """
self._to_notify = to_notify self._to_notify = to_notify
def register_result(self, apply_result): def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will """Used to identify which results we're waiting for.
always be called BEFORE the Jobs get submitted to the work
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can queue, and BEFORE the __iter__ and _get_result() methods can
be called be called
\param apply_result ApplyResult object to add in our collection \param apply_result ApplyResult object to add in our collection
@ -441,6 +475,7 @@ class AbstractResultCollector(object):
def notify_ready(self, apply_result): def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via """Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised). is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job \param apply_result ApplyResult object telling us that the job
@ -450,6 +485,7 @@ class AbstractResultCollector(object):
def _get_result(self, idx, timeout=None): def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the """Called by the CollectorIterator object to retrieve the
result's values one after another (order defined by the result's values one after another (order defined by the
implementation) implementation)
\param idx The index of the result we want, wrt collector's order \param idx The index of the result we want, wrt collector's order
@ -460,19 +496,23 @@ class AbstractResultCollector(object):
raise NotImplementedError("Children classes must implement it") raise NotImplementedError("Children classes must implement it")
def __iter__(self): def __iter__(self):
"""Return a new CollectorIterator object for this collector""" """Return a new CollectorIterator object for this collector.
"""
return CollectorIterator(self) return CollectorIterator(self)
class CollectorIterator(object): class CollectorIterator(object):
"""An iterator that allows to iterate over the result values """An iterator that allows to iterate over the result values
available in the given collector object. Equipped with an extended available in the given collector object. Equipped with an extended
next() method accepting a timeout argument. Created by the next() method accepting a timeout argument. Created by the
AbstractResultCollector::__iter__() method""" AbstractResultCollector::__iter__() method
"""
def __init__(self, collector): def __init__(self, collector):
"""\param AbstractResultCollector instance""" """\param AbstractResultCollector instance.
"""
self._collector = collector self._collector = collector
self._idx = 0 self._idx = 0
@ -480,16 +520,18 @@ class CollectorIterator(object):
return self return self
def next(self, timeout=None): def next(self, timeout=None):
"""Return the next result value in the sequence. Raise """Return the next result value in the sequence.
StopIteration at the end. Can raise the exception raised by
the Job""" Raise StopIteration at the end. Can raise the exception raised by
the Job.
"""
try: try:
apply_result = self._collector._get_result(self._idx, timeout) apply_result = self._collector._get_result(self._idx, timeout)
except IndexError: except IndexError:
# Reset for next time # Reset for next time
self._idx = 0 self._idx = 0
raise StopIteration raise StopIteration
except: except Exception:
self._idx = 0 self._idx = 0
raise raise
self._idx += 1 self._idx += 1
@ -500,13 +542,15 @@ class CollectorIterator(object):
class UnorderedResultCollector(AbstractResultCollector): class UnorderedResultCollector(AbstractResultCollector):
"""An AbstractResultCollector implementation that collects the """An AbstractResultCollector implementation that collects the
values of the ApplyResult objects in the order they become ready. The values of the ApplyResult objects in the order they become ready. The
CollectorIterator object returned by __iter__() will iterate over CollectorIterator object returned by __iter__() will iterate over
them in the order they become ready""" them in the order they become ready.
"""
def __init__(self, to_notify=None): def __init__(self, to_notify=None):
""" """\param to_notify ApplyResult object to notify when all the
\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None. results we're waiting for become available. Can be None.
""" """
AbstractResultCollector.__init__(self, to_notify) AbstractResultCollector.__init__(self, to_notify)
@ -515,8 +559,9 @@ class UnorderedResultCollector(AbstractResultCollector):
self._expected = 0 self._expected = 0
def register_result(self, apply_result): def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will """Used to identify which results we're waiting for.
always be called BEFORE the Jobs get submitted to the work
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can queue, and BEFORE the __iter__ and _get_result() methods can
be called be called
\param apply_result ApplyResult object to add in our collection \param apply_result ApplyResult object to add in our collection
@ -525,6 +570,7 @@ class UnorderedResultCollector(AbstractResultCollector):
def _get_result(self, idx, timeout=None): def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the """Called by the CollectorIterator object to retrieve the
result's values one after another, in the order the results have result's values one after another, in the order the results have
become available. become available.
\param idx The index of the result we want, wrt collector's order \param idx The index of the result we want, wrt collector's order
@ -553,6 +599,7 @@ class UnorderedResultCollector(AbstractResultCollector):
def notify_ready(self, apply_result): def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via """Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised). is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job \param apply_result ApplyResult object telling us that the job
@ -575,13 +622,15 @@ class UnorderedResultCollector(AbstractResultCollector):
class OrderedResultCollector(AbstractResultCollector): class OrderedResultCollector(AbstractResultCollector):
"""An AbstractResultCollector implementation that collects the """An AbstractResultCollector implementation that collects the
values of the ApplyResult objects in the order they have been values of the ApplyResult objects in the order they have been
submitted. The CollectorIterator object returned by __iter__() submitted. The CollectorIterator object returned by __iter__()
will iterate over them in the order they have been submitted""" will iterate over them in the order they have been submitted.
"""
def __init__(self, to_notify=None, as_iterator=True): def __init__(self, to_notify=None, as_iterator=True):
""" """\param to_notify ApplyResult object to notify when all the
\param to_notify ApplyResult object to notify when all the
results we're waiting for become available. Can be None. results we're waiting for become available. Can be None.
\param as_iterator boolean telling whether the result value \param as_iterator boolean telling whether the result value
set on to_notify should be an iterator (available as soon as 1 set on to_notify should be an iterator (available as soon as 1
@ -595,8 +644,9 @@ class OrderedResultCollector(AbstractResultCollector):
self._as_iterator = as_iterator self._as_iterator = as_iterator
def register_result(self, apply_result): def register_result(self, apply_result):
"""Used to identify which results we're waiting for. Will """Used to identify which results we're waiting for.
always be called BEFORE the Jobs get submitted to the work
Will always be called BEFORE the Jobs get submitted to the work
queue, and BEFORE the __iter__ and _get_result() methods can queue, and BEFORE the __iter__ and _get_result() methods can
be called be called
\param apply_result ApplyResult object to add in our collection \param apply_result ApplyResult object to add in our collection
@ -606,6 +656,7 @@ class OrderedResultCollector(AbstractResultCollector):
def _get_result(self, idx, timeout=None): def _get_result(self, idx, timeout=None):
"""Called by the CollectorIterator object to retrieve the """Called by the CollectorIterator object to retrieve the
result's values one after another (order defined by the result's values one after another (order defined by the
implementation) implementation)
\param idx The index of the result we want, wrt collector's order \param idx The index of the result we want, wrt collector's order
@ -619,6 +670,7 @@ class OrderedResultCollector(AbstractResultCollector):
def notify_ready(self, apply_result): def notify_ready(self, apply_result):
"""Called by the ApplyResult object (already registered via """Called by the ApplyResult object (already registered via
register_result()) that it is now ready (ie. the Job's result register_result()) that it is now ready (ie. the Job's result
is available or an exception has been raised). is available or an exception has been raised).
\param apply_result ApplyResult object telling us that the job \param apply_result ApplyResult object telling us that the job
@ -641,25 +693,25 @@ class OrderedResultCollector(AbstractResultCollector):
elif not self._as_iterator and got_last: elif not self._as_iterator and got_last:
try: try:
lst = [r.get(0) for r in self._results] lst = [r.get(0) for r in self._results]
except: except Exception:
self._to_notify._set_exception() self._to_notify._set_exception()
else: else:
self._to_notify._set_value(lst) self._to_notify._set_value(lst)
def _test(): def _test():
"""Some tests""" """Some tests.
import thread """
import time import time
def f(x): def f(x):
return x * x return x * x
def work(seconds): def work(seconds):
print("[%d] Start to work for %fs..." % (thread.get_ident(), seconds)) print("[%d] Start to work for %fs..." % (threading.thread.get_ident(), seconds))
time.sleep(seconds) time.sleep(seconds)
print("[%d] Work done (%fs)." % (thread.get_ident(), seconds)) print("[%d] Work done (%fs)." % (threading.thread.get_ident(), seconds))
return "%d slept %fs" % (thread.get_ident(), seconds) return "%d slept %fs" % (threading.thread.get_ident(), seconds)
# Test copy/pasted from multiprocessing # Test copy/pasted from multiprocessing
pool = Pool(9) # start 4 worker threads pool = Pool(9) # start 4 worker threads

View File

@ -1,10 +1,10 @@
from collections import namedtuple import collections
import time import Queue
from Queue import Queue, Empty
import threading import threading
import time
from monagent.collector.checks import AgentCheck import monagent.collector.checks
from monagent.collector.checks.libs.thread_pool import Pool import monagent.collector.checks.libs.thread_pool
TIMEOUT = 180 TIMEOUT = 180
@ -12,16 +12,16 @@ DEFAULT_SIZE_POOL = 6
MAX_LOOP_ITERATIONS = 1000 MAX_LOOP_ITERATIONS = 1000
FAILURE = "FAILURE" FAILURE = "FAILURE"
up_down = namedtuple('up_down', ['UP', 'DOWN']) up_down = collections.namedtuple('up_down', ['UP', 'DOWN'])
Status = up_down('UP', 'DOWN') Status = up_down('UP', 'DOWN')
EventType = up_down("servicecheck.state_change.up", "servicecheck.state_change.down") EventType = up_down("servicecheck.state_change.up", "servicecheck.state_change.down")
class ServicesCheck(AgentCheck): class ServicesCheck(monagent.collector.checks.AgentCheck):
SOURCE_TYPE_NAME = 'servicecheck' SOURCE_TYPE_NAME = 'servicecheck'
""" """Services checks inherits from this class.
Services checks inherits from this class.
This class should never be directly instanciated. This class should never be directly instanciated.
Work flow: Work flow:
@ -36,11 +36,10 @@ class ServicesCheck(AgentCheck):
Status.UP or Status.DOWN. Status.UP or Status.DOWN.
The second element is a short error message that will be displayed The second element is a short error message that will be displayed
when the service turns down. when the service turns down.
""" """
def __init__(self, name, init_config, agentConfig, instances): def __init__(self, name, init_config, agentConfig, instances):
AgentCheck.__init__(self, name, init_config, agentConfig, instances) monagent.collector.checks.AgentCheck.__init__(self, name, init_config, agentConfig, instances)
# A dictionary to keep track of service statuses # A dictionary to keep track of service statuses
self.statuses = {} self.statuses = {}
@ -60,9 +59,9 @@ class ServicesCheck(AgentCheck):
default_size = min(self.instance_count(), DEFAULT_SIZE_POOL) default_size = min(self.instance_count(), DEFAULT_SIZE_POOL)
self.pool_size = int(self.init_config.get('threads_count', default_size)) self.pool_size = int(self.init_config.get('threads_count', default_size))
self.pool = Pool(self.pool_size) self.pool = monagent.collector.checks.libs.thread_pool.Pool(self.pool_size)
self.resultsq = Queue() self.resultsq = Queue.Queue()
self.jobs_status = {} self.jobs_status = {}
self.pool_started = True self.pool_started = True
@ -110,7 +109,7 @@ class ServicesCheck(AgentCheck):
# We put the results in the result queue # We put the results in the result queue
self.resultsq.put(result) self.resultsq.put(result)
except Exception as e: except Exception:
result = (FAILURE, FAILURE, FAILURE, FAILURE) result = (FAILURE, FAILURE, FAILURE, FAILURE)
self.resultsq.put(result) self.resultsq.put(result)
@ -119,7 +118,7 @@ class ServicesCheck(AgentCheck):
try: try:
# We want to fetch the result in a non blocking way # We want to fetch the result in a non blocking way
status, msg, name, queue_instance = self.resultsq.get_nowait() status, msg, name, queue_instance = self.resultsq.get_nowait()
except Empty: except Queue.Empty:
break break
if status == FAILURE: if status == FAILURE:
@ -165,13 +164,12 @@ class ServicesCheck(AgentCheck):
del self.jobs_status[name] del self.jobs_status[name]
def _check(self, instance): def _check(self, instance):
"""This function should be implemented by inherited classes""" """This function should be implemented by inherited classes.
"""
raise NotImplementedError raise NotImplementedError
def _clean(self): def _clean(self):
now = time.time() now = time.time()
stuck_process = None
stuck_time = time.time()
for name in self.jobs_status.keys(): for name in self.jobs_status.keys():
start_time = self.jobs_status[name] start_time = self.jobs_status[name]
if now - start_time > TIMEOUT: if now - start_time > TIMEOUT:

View File

@ -1,8 +1,8 @@
""" """Unix system checks.
Unix system checks.
""" """
# stdlib # stdlib
import functools
import logging import logging
import operator import operator
import platform import platform
@ -12,22 +12,24 @@ import sys
import time import time
# project # project
from monagent.collector.checks.check import Check
from monagent.common.metrics import Measurement import monagent.collector.checks.check
from monagent.common.util import Platform import monagent.common.metrics
from functools import reduce import monagent.common.util
# locale-resilient float converter # locale-resilient float converter
to_float = lambda s: float(s.replace(",", ".")) to_float = lambda s: float(s.replace(",", "."))
class Disk(Check): class Disk(monagent.collector.checks.check.Check):
""" Collects metrics about the machine's disks. """ """Collects metrics about the machine's disks.
"""
def check(self): def check(self):
"""Get disk space/inode stats""" """Get disk space/inode stats.
"""
# First get the configuration. # First get the configuration.
if self.agent_config is not None: if self.agent_config is not None:
use_mount = self.agent_config.get("use_mount", False) use_mount = self.agent_config.get("use_mount", False)
@ -58,12 +60,12 @@ class Disk(Check):
# parse into a list of Measurements # parse into a list of Measurements
stats.update(inodes) stats.update(inodes)
timestamp = time.time() timestamp = time.time()
measurements = [ measurements = [monagent.common.metrics.Measurement(key.split('.', 1)[1],
Measurement( timestamp,
key.split( value,
'.', 1)[1], timestamp, value, { {'device': key.split('.', 1)[0]})
'device': key.split( for key, value in stats.iteritems()]
'.', 1)[0]}) for key, value in stats.iteritems()]
return measurements return measurements
except Exception: except Exception:
@ -72,9 +74,9 @@ class Disk(Check):
def parse_df_output( def parse_df_output(
self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None): self, df_output, platform_name, inodes=False, use_mount=False, blacklist_re=None):
""" """Parse the output of the df command.
Parse the output of the df command. If use_volume is true the volume
is used to anchor the metric, otherwise false the mount If use_volume is true the volume is used to anchor the metric, otherwise false the mount
point is used. Returns a tuple of (disk, inode). point is used. Returns a tuple of (disk, inode).
""" """
usage_data = {} usage_data = {}
@ -88,14 +90,14 @@ class Disk(Check):
if use_mount: if use_mount:
parts[0] = parts[-1] parts[0] = parts[-1]
if inodes: if inodes:
if Platform.is_darwin(platform_name): if monagent.common.util.Platform.is_darwin(platform_name):
# Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted # Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted
# Inodes are in position 5, 6 and we need to compute the total # Inodes are in position 5, 6 and we need to compute the total
# Total # Total
parts[1] = int(parts[5]) + int(parts[6]) # Total parts[1] = int(parts[5]) + int(parts[6]) # Total
parts[2] = int(parts[5]) # Used parts[2] = int(parts[5]) # Used
parts[3] = int(parts[6]) # Available parts[3] = int(parts[6]) # Available
elif Platform.is_freebsd(platform_name): elif monagent.common.util.Platform.is_freebsd(platform_name):
# Filesystem 1K-blocks Used Avail Capacity iused ifree %iused Mounted # Filesystem 1K-blocks Used Avail Capacity iused ifree %iused Mounted
# Inodes are in position 5, 6 and we need to compute the total # Inodes are in position 5, 6 and we need to compute the total
parts[1] = int(parts[5]) + int(parts[6]) # Total parts[1] = int(parts[5]) + int(parts[6]) # Total
@ -128,8 +130,7 @@ class Disk(Check):
return True return True
def _is_real_device(self, device): def _is_real_device(self, device):
""" """Return true if we should track the given device name and false otherwise.
Return true if we should track the given device name and false otherwise.
""" """
# First, skip empty lines. # First, skip empty lines.
if not device or len(device) <= 1: if not device or len(device) <= 1:
@ -164,10 +165,9 @@ class Disk(Check):
return devices return devices
def _transform_df_output(self, df_output, blacklist_re): def _transform_df_output(self, df_output, blacklist_re):
""" """Given raw output for the df command, transform it into a normalized list devices.
Given raw output for the df command, transform it into a normalized
list devices. A 'device' is a list with fields corresponding to the A 'device' is a list with fields corresponding to the output of df output on each platform.
output of df output on each platform.
""" """
all_devices = [l.strip().split() for l in df_output.split("\n")] all_devices = [l.strip().split() for l in df_output.split("\n")]
@ -190,10 +190,10 @@ class Disk(Check):
return devices return devices
class IO(Check): class IO(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.header_re = re.compile(r'([%\\/\-_a-zA-Z0-9]+)[\s+]?') self.header_re = re.compile(r'([%\\/\-_a-zA-Z0-9]+)[\s+]?')
self.item_re = re.compile(r'^([a-zA-Z0-9\/]+)') self.item_re = re.compile(r'^([a-zA-Z0-9\/]+)')
self.value_re = re.compile(r'\d+\.\d+') self.value_re = re.compile(r'\d+\.\d+')
@ -252,7 +252,8 @@ class IO(Check):
@staticmethod @staticmethod
def xlate(metric_name, os_name): def xlate(metric_name, os_name):
"""Standardize on linux metric names""" """Standardize on linux metric names.
"""
if os_name == "sunos": if os_name == "sunos":
names = {"wait": "await", names = {"wait": "await",
"svc_t": "svctm", "svc_t": "svctm",
@ -282,7 +283,7 @@ class IO(Check):
""" """
io = {} io = {}
try: try:
if Platform.is_linux(): if monagent.common.util.Platform.is_linux():
stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'], stdout = sp.Popen(['iostat', '-d', '1', '2', '-x', '-k'],
stdout=sp.PIPE, stdout=sp.PIPE,
close_fds=True).communicate()[0] close_fds=True).communicate()[0]
@ -394,7 +395,7 @@ class IO(Check):
for dev_name, stats in filtered_io.iteritems(): for dev_name, stats in filtered_io.iteritems():
filtered_stats = {stat: stats[stat] filtered_stats = {stat: stats[stat]
for stat in stats.iterkeys() if stat not in self.stat_blacklist} for stat in stats.iterkeys() if stat not in self.stat_blacklist}
m_list = [Measurement(key, timestamp, value, {'device': dev_name}) m_list = [monagent.common.metrics.Measurement(key, timestamp, value, {'device': dev_name})
for key, value in filtered_stats.iteritems()] for key, value in filtered_stats.iteritems()]
measurements.extend(m_list) measurements.extend(m_list)
@ -405,10 +406,10 @@ class IO(Check):
return {} return {}
class Load(Check): class Load(monagent.collector.checks.check.Check):
def check(self): def check(self):
if Platform.is_linux(): if monagent.common.util.Platform.is_linux():
try: try:
loadAvrgProc = open('/proc/loadavg', 'r') loadAvrgProc = open('/proc/loadavg', 'r')
uptime = loadAvrgProc.readlines() uptime = loadAvrgProc.readlines()
@ -437,10 +438,10 @@ class Load(Check):
} }
class Memory(Check): class Memory(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
macV = None macV = None
if sys.platform == 'darwin': if sys.platform == 'darwin':
macV = platform.mac_ver() macV = platform.mac_ver()
@ -464,7 +465,7 @@ class Memory(Check):
pass pass
def check(self): def check(self):
if Platform.is_linux(): if monagent.common.util.Platform.is_linux():
try: try:
meminfoProc = open('/proc/meminfo', 'r') meminfoProc = open('/proc/meminfo', 'r')
lines = meminfoProc.readlines() lines = meminfoProc.readlines()
@ -736,10 +737,11 @@ class Memory(Check):
return {} return {}
class Cpu(Check): class Cpu(monagent.collector.checks.check.Check):
def check(self): def check(self):
"""Return an aggregate of CPU stats across all CPUs """Return an aggregate of CPU stats across all CPUs.
When figures are not available, False is sent back. When figures are not available, False is sent back.
""" """
def format_results(us, sy, wa, idle, st): def format_results(us, sy, wa, idle, st):
@ -754,7 +756,8 @@ class Cpu(Check):
return data return data
def get_value(legend, data, name, filter_value=None): def get_value(legend, data, name, filter_value=None):
"Using the legend and a metric name, get the value or None from the data line" """Using the legend and a metric name, get the value or None from the data line.
"""
if name in legend: if name in legend:
value = to_float(data[legend.index(name)]) value = to_float(data[legend.index(name)])
if filter_value is not None: if filter_value is not None:
@ -767,7 +770,7 @@ class Cpu(Check):
self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend)) self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend))
return 0.0 return 0.0
if Platform.is_linux(): if monagent.common.util.Platform.is_linux():
mpstat = sp.Popen(['mpstat', '1', '3'], stdout=sp.PIPE, close_fds=True).communicate()[0] mpstat = sp.Popen(['mpstat', '1', '3'], stdout=sp.PIPE, close_fds=True).communicate()[0]
# topdog@ip:~$ mpstat 1 3 # topdog@ip:~$ mpstat 1 3
# Linux 2.6.32-341-ec2 (ip) 01/19/2012 _x86_64_ (2 CPU) # Linux 2.6.32-341-ec2 (ip) 01/19/2012 _x86_64_ (2 CPU)
@ -917,7 +920,7 @@ class Cpu(Check):
size = [get_value(headers, l.split(), "sze") for l in d_lines] size = [get_value(headers, l.split(), "sze") for l in d_lines]
count = sum(size) count = sum(size)
rel_size = [s / count for s in size] rel_size = [s / count for s in size]
dot = lambda v1, v2: reduce(operator.add, map(operator.mul, v1, v2)) dot = lambda v1, v2: functools.reduce(operator.add, map(operator.mul, v1, v2))
return format_results(dot(user, rel_size), return format_results(dot(user, rel_size),
dot(kern, rel_size), dot(kern, rel_size),
dot(wait, rel_size), dot(wait, rel_size),
@ -932,9 +935,9 @@ class Cpu(Check):
def _get_subprocess_output(command): def _get_subprocess_output(command):
""" """Run the given subprocess command and return it's output.
Run the given subprocess command and return it's output. Raise an Exception
if an error occurs. Raise an Exception if an error occurs.
""" """
proc = sp.Popen(command, stdout=sp.PIPE, close_fds=True) proc = sp.Popen(command, stdout=sp.PIPE, close_fds=True)
return proc.stdout.read() return proc.stdout.read()

View File

@ -1,4 +1,4 @@
from monagent.collector.checks.check import Check import monagent.collector.checks.check
try: try:
import wmi import wmi
@ -15,10 +15,10 @@ B2MB = float(1048576)
KB2MB = B2KB = float(1024) KB2MB = B2KB = float(1024)
class Processes(Check): class Processes(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.gauge('system.proc.queue_length') self.gauge('system.proc.queue_length')
self.gauge('system.proc.count') self.gauge('system.proc.count')
@ -31,7 +31,7 @@ class Processes(Check):
return return
try: try:
cpu = w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0] w.Win32_PerfFormattedData_PerfOS_Processor(name="_Total")[0]
except AttributeError: except AttributeError:
self.logger.info('Missing Win32_PerfFormattedData_PerfOS_Processor WMI class.' + self.logger.info('Missing Win32_PerfFormattedData_PerfOS_Processor WMI class.' +
' No process metrics will be returned.') ' No process metrics will be returned.')
@ -44,10 +44,10 @@ class Processes(Check):
return self.get_metrics() return self.get_metrics()
class Memory(Check): class Memory(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger self.logger = logger
self.gauge('system.mem.free') self.gauge('system.mem.free')
self.gauge('system.mem.used') self.gauge('system.mem.used')
@ -84,10 +84,10 @@ class Memory(Check):
return self.get_metrics() return self.get_metrics()
class Cpu(Check): class Cpu(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger self.logger = logger
self.gauge('system.cpu.user') self.gauge('system.cpu.user')
self.gauge('system.cpu.idle') self.gauge('system.cpu.idle')
@ -122,9 +122,10 @@ class Cpu(Check):
@staticmethod @staticmethod
def _average_metric(wmi_class, wmi_prop): def _average_metric(wmi_class, wmi_prop):
''' Sum all of the values of a metric from a WMI class object, excluding """Sum all of the values of a metric from a WMI class object.
the value for "_Total"
''' Excludes the value for "_Total"
"""
val = 0 val = 0
counter = 0 counter = 0
for wmi_object in wmi_class: for wmi_object in wmi_class:
@ -142,10 +143,10 @@ class Cpu(Check):
return val return val
class Network(Check): class Network(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger self.logger = logger
self.gauge('system.net.bytes_rcvd') self.gauge('system.net.bytes_rcvd')
self.gauge('system.net.bytes_sent') self.gauge('system.net.bytes_sent')
@ -169,10 +170,10 @@ class Network(Check):
return self.get_metrics() return self.get_metrics()
class Disk(Check): class Disk(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger self.logger = logger
self.gauge('system.disk.free') self.gauge('system.disk.free')
self.gauge('system.disk.total') self.gauge('system.disk.total')
@ -203,10 +204,10 @@ class Disk(Check):
return self.get_metrics() return self.get_metrics()
class IO(Check): class IO(monagent.collector.checks.check.Check):
def __init__(self, logger): def __init__(self, logger):
Check.__init__(self, logger) monagent.collector.checks.check.Check.__init__(self, logger)
self.logger = logger self.logger = logger
self.gauge('system.io.wkb_s') self.gauge('system.io.wkb_s')
self.gauge('system.io.w_s') self.gauge('system.io.w_s')

View File

@ -1,12 +1,11 @@
import base64 import base64
import binascii
import os import os
import stat
# os.SEEK_END is defined in python 2.5 # os.SEEK_END is defined in python 2.5
SEEK_END = 2 SEEK_END = 2
from stat import *
import binascii
def median(vals): def median(vals):
vals = sorted(vals) vals = sorted(vals)
@ -21,8 +20,9 @@ def median(vals):
def add_basic_auth(request, username, password): def add_basic_auth(request, username, password):
""" A helper to add basic authentication to a urllib2 request. We do this """A helper to add basic authentication to a urllib2 request.
across a variety of checks so it's good to have this in one place.
We do this across a variety of checks so it's good to have this in one place.
""" """
auth_str = base64.encodestring('%s:%s' % (username, password)).strip() auth_str = base64.encodestring('%s:%s' % (username, password)).strip()
request.add_header('Authorization', 'Basic %s' % auth_str) request.add_header('Authorization', 'Basic %s' % auth_str)
@ -52,8 +52,8 @@ class TailFile(object):
already_open = True already_open = True
stat = os.stat(self._path) stat = os.stat(self._path)
inode = stat[ST_INO] inode = stat[stat.ST_INO]
size = stat[ST_SIZE] size = stat[stat.ST_SIZE]
# Compute CRC of the beginning of the file # Compute CRC of the beginning of the file
crc = None crc = None
@ -98,8 +98,10 @@ class TailFile(object):
def tail(self, line_by_line=True, move_end=True): def tail(self, line_by_line=True, move_end=True):
"""Read line-by-line and run callback on each line. """Read line-by-line and run callback on each line.
line_by_line: yield each time a callback has returned True line_by_line: yield each time a callback has returned True
move_end: start from the last line of the log""" move_end: start from the last line of the log
"""
try: try:
self._open_file(move_end=move_end) self._open_file(move_end=move_end)

View File

@ -1,8 +1,8 @@
import urllib2 import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Apache(AgentCheck): class Apache(AgentCheck):

View File

@ -1,7 +1,7 @@
from collections import namedtuple
from fnmatch import fnmatch from fnmatch import fnmatch
import os import os
import time import time
from collections import namedtuple
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -52,16 +52,16 @@ class Cacti(AgentCheck):
# The rrdtool module is required for the check to work # The rrdtool module is required for the check to work
try: try:
import rrdtool import rrdtool
except ImportError as e: except ImportError:
raise Exception( raise Exception(
"Cannot import rrdtool module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti") "Cannot import rrdtool module. This module is required for the cacti plugin to work correctly")
# Try importing MySQL # Try importing MySQL
try: try:
import MySQLdb import MySQLdb
except ImportError as e: except ImportError:
raise Exception( raise Exception(
"Cannot import MySQLdb module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/cacti") "Cannot import MySQLdb module. This module is required for the cacti plugin to work correctly")
connection = MySQLdb.connect(config.host, config.user, config.password, config.db) connection = MySQLdb.connect(config.host, config.user, config.password, config.db)
@ -123,7 +123,9 @@ class Cacti(AgentCheck):
return Config(host, user, password, db, rrd_path, whitelist, field_names) return Config(host, user, password, db, rrd_path, whitelist, field_names)
def _read_rrd(self, rrd_path, hostname, device_name): def _read_rrd(self, rrd_path, hostname, device_name):
''' Main metric fetching method ''' """Main metric fetching method.
"""
import rrdtool import rrdtool
metric_count = 0 metric_count = 0
@ -177,9 +179,10 @@ class Cacti(AgentCheck):
return metric_count return metric_count
def _fetch_rrd_meta(self, connection, rrd_path_root, whitelist, field_names): def _fetch_rrd_meta(self, connection, rrd_path_root, whitelist, field_names):
''' Fetch metadata about each RRD in this Cacti DB, returning a list of """Fetch metadata about each RRD in this Cacti DB.
tuples of (hostname, device_name, rrd_path)
''' Returns a list of tuples of (hostname, device_name, rrd_path)
"""
def _in_whitelist(rrd): def _in_whitelist(rrd):
path = rrd.replace('<path_rra>/', '') path = rrd.replace('<path_rra>/', '')
for p in whitelist: for p in whitelist:
@ -226,7 +229,9 @@ class Cacti(AgentCheck):
@staticmethod @staticmethod
def _format_metric_name(m_name, cfunc): def _format_metric_name(m_name, cfunc):
''' Format a cacti metric name into a Datadog-friendly name ''' """Format a cacti metric name into a Datadog-friendly name.
"""
try: try:
aggr = CFUNC_TO_AGGR[cfunc] aggr = CFUNC_TO_AGGR[cfunc]
except KeyError: except KeyError:
@ -242,16 +247,18 @@ class Cacti(AgentCheck):
@staticmethod @staticmethod
def _transform_metric(m_name, val): def _transform_metric(m_name, val):
''' Add any special case transformations here ''' """Add any special case transformations here.
"""
# Report memory in MB # Report memory in MB
if m_name[0:11] in ('system.mem.', 'system.disk'): if m_name[0:11] in ('system.mem.', 'system.disk'):
return val / 1024 return val / 1024
return val return val
''' """For backwards compatability with pre-checks_d configuration.
For backwards compatability with pre-checks_d configuration.
Convert old-style config to new-style config. Convert old-style config to new-style config.
''' """
@staticmethod @staticmethod
def parse_agent_config(agentConfig): def parse_agent_config(agentConfig):
required = ['cacti_mysql_server', 'cacti_mysql_user', 'cacti_rrd_path'] required = ['cacti_mysql_server', 'cacti_mysql_user', 'cacti_rrd_path']

View File

@ -1,13 +1,14 @@
import urllib2
import json import json
import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.common.util import headers
class CouchDb(AgentCheck): class CouchDb(AgentCheck):
"""Extracts stats from CouchDB via its REST API """Extracts stats from CouchDB via its REST API
http://wiki.apache.org/couchdb/Runtime_Statistics http://wiki.apache.org/couchdb/Runtime_Statistics
""" """
@ -28,7 +29,9 @@ class CouchDb(AgentCheck):
self.gauge(metric_name, val, dimensions=metric_dimensions, device_name=db_name) self.gauge(metric_name, val, dimensions=metric_dimensions, device_name=db_name)
def _get_stats(self, url): def _get_stats(self, url):
"Hit a given URL and return the parsed json" """Hit a given URL and return the parsed json.
"""
self.log.debug('Fetching Couchdb stats at url: %s' % url) self.log.debug('Fetching Couchdb stats at url: %s' % url)
req = urllib2.Request(url, None, headers(self.agent_config)) req = urllib2.Request(url, None, headers(self.agent_config))

View File

@ -3,9 +3,9 @@ import urllib2
import re import re
import sys import sys
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
# Constants # Constants
@ -16,6 +16,7 @@ DEFAULT_TIMEOUT = 10
class Couchbase(AgentCheck): class Couchbase(AgentCheck):
"""Extracts stats from Couchbase via its REST API """Extracts stats from Couchbase via its REST API
http://docs.couchbase.com/couchbase-manual-2.0/#using-the-rest-api http://docs.couchbase.com/couchbase-manual-2.0/#using-the-rest-api
""" """
@ -49,7 +50,9 @@ class Couchbase(AgentCheck):
metric_name, val, dimensions=metric_dimensions, device_name=node_name) metric_name, val, dimensions=metric_dimensions, device_name=node_name)
def _get_stats(self, url, instance): def _get_stats(self, url, instance):
"Hit a given URL and return the parsed json" """Hit a given URL and return the parsed json.
"""
self.log.debug('Fetching Couchbase stats at url: %s' % url) self.log.debug('Fetching Couchbase stats at url: %s' % url)
req = urllib2.Request(url, None, headers(self.agent_config)) req = urllib2.Request(url, None, headers(self.agent_config))
if 'user' in instance and 'password' in instance: if 'user' in instance and 'password' in instance:

View File

@ -1,6 +1,9 @@
from fnmatch import fnmatch from fnmatch import fnmatch
from os import stat, walk from os import stat
from os.path import abspath, exists, join from os import walk
from os.path import abspath
from os.path import exists
from os.path import join
import time import time
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck

View File

@ -1,10 +1,10 @@
import json
import urllib2
import urllib
import httplib import httplib
import socket import json
import os import os
import re import re
import socket
import urllib2
import urllib
from urlparse import urlsplit from urlparse import urlsplit
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -70,7 +70,9 @@ DOCKER_TAGS = [
class UnixHTTPConnection(httplib.HTTPConnection, object): class UnixHTTPConnection(httplib.HTTPConnection, object):
"""Class used in conjuction with UnixSocketHandler to make urllib2 """Class used in conjuction with UnixSocketHandler to make urllib2
compatible with Unix sockets."""
compatible with Unix sockets.
"""
def __init__(self, unix_socket): def __init__(self, unix_socket):
self._unix_socket = unix_socket self._unix_socket = unix_socket
@ -87,8 +89,9 @@ class UnixHTTPConnection(httplib.HTTPConnection, object):
class UnixSocketHandler(urllib2.AbstractHTTPHandler): class UnixSocketHandler(urllib2.AbstractHTTPHandler):
"""Class that makes Unix sockets work with urllib2 without any additional """Class that makes Unix sockets work with urllib2 without any additional dependencies.
dependencies."""
"""
def unix_open(self, req): def unix_open(self, req):
full_path = "%s%s" % urlsplit(req.get_full_url())[1:3] full_path = "%s%s" % urlsplit(req.get_full_url())[1:3]
@ -180,15 +183,21 @@ class Docker(AgentCheck):
return False return False
def _get_containers(self, instance): def _get_containers(self, instance):
"""Gets the list of running containers in Docker.""" """Gets the list of running containers in Docker.
"""
return self._get_json("%(url)s/containers/json" % instance, params={"size": 1}) return self._get_json("%(url)s/containers/json" % instance, params={"size": 1})
def _get_container(self, instance, cid): def _get_container(self, instance, cid):
"""Get container information from Docker, gived a container Id.""" """Get container information from Docker, gived a container Id.
"""
return self._get_json("%s/containers/%s/json" % (instance["url"], cid)) return self._get_json("%s/containers/%s/json" % (instance["url"], cid))
def _get_json(self, uri, params=None): def _get_json(self, uri, params=None):
"""Utility method to get and parse JSON streams.""" """Utility method to get and parse JSON streams.
"""
if params: if params:
uri = "%s?%s" % (uri, urllib.urlencode(params)) uri = "%s?%s" % (uri, urllib.urlencode(params))
self.log.debug("Connecting to: %s" % uri) self.log.debug("Connecting to: %s" % uri)
@ -205,8 +214,10 @@ class Docker(AgentCheck):
@staticmethod @staticmethod
def _find_cgroup(hierarchy): def _find_cgroup(hierarchy):
"""Finds the mount point for a specified cgroup hierarchy. Works with """Finds the mount point for a specified cgroup hierarchy.
old style and new style mounts."""
Works with old style and new style mounts.
"""
try: try:
fp = open("/proc/mounts") fp = open("/proc/mounts")
mounts = map(lambda x: x.split(), fp.read().splitlines()) mounts = map(lambda x: x.split(), fp.read().splitlines())
@ -221,7 +232,9 @@ class Docker(AgentCheck):
return mountpoint return mountpoint
def _parse_cgroup_file(self, file_): def _parse_cgroup_file(self, file_):
"""Parses a cgroup pseudo file for key/values.""" """Parses a cgroup pseudo file for key/values.
"""
fp = None fp = None
try: try:
self.log.debug("Opening file: %s" % file_) self.log.debug("Opening file: %s" % file_)

View File

@ -3,12 +3,12 @@ import socket
import subprocess import subprocess
import sys import sys
import time import time
import urlparse
import urllib2 import urllib2
import urlparse
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class NodeNotFound(Exception): class NodeNotFound(Exception):
@ -154,8 +154,8 @@ class ElasticSearch(AgentCheck):
self._process_health_data(config_url, health_data, dimensions=dimensions) self._process_health_data(config_url, health_data, dimensions=dimensions)
def _get_es_version(self, config_url, auth=None): def _get_es_version(self, config_url, auth=None):
""" """Get the running version of Elastic Search.
Get the running version of Elastic Search
""" """
try: try:
@ -170,8 +170,8 @@ class ElasticSearch(AgentCheck):
return version return version
def _define_params(self, version): def _define_params(self, version):
""" """Define the set of URLs and METRICS to use depending on the running ES version.
Define the set of URLs and METRICS to use depending on the running ES version
""" """
if version >= [0, 90, 10]: if version >= [0, 90, 10]:
@ -214,8 +214,9 @@ class ElasticSearch(AgentCheck):
self.METRICS.update(additional_metrics) self.METRICS.update(additional_metrics)
def _get_data(self, url, auth=None): def _get_data(self, url, auth=None):
""" Hit a given URL and return the parsed json """Hit a given URL and return the parsed json
`auth` is a tuple of (username, password) or None
`auth` is a tuple of (username, password) or None
""" """
req = urllib2.Request(url, None, headers(self.agent_config)) req = urllib2.Request(url, None, headers(self.agent_config))
if auth: if auth:
@ -264,8 +265,9 @@ class ElasticSearch(AgentCheck):
process_metric(metric, *desc) process_metric(metric, *desc)
def _get_primary_addr(self, url, node_name, auth): def _get_primary_addr(self, url, node_name, auth):
""" Returns a list of primary interface addresses as seen by ES. """Returns a list of primary interface addresses as seen by ES.
Used in ES < 0.19
Used in ES < 0.19
""" """
req = urllib2.Request(url, None, headers(self.agent_config)) req = urllib2.Request(url, None, headers(self.agent_config))
# Load basic authentication configuration, if available. # Load basic authentication configuration, if available.
@ -286,9 +288,10 @@ class ElasticSearch(AgentCheck):
@staticmethod @staticmethod
def _host_matches_node(primary_addrs): def _host_matches_node(primary_addrs):
""" For < 0.19, check if the current host matches the IP given in the """For < 0.19, check if the current host matches the IP given in the
cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
`ifconfig` on Mac cluster nodes check `/_cluster/nodes`. Uses `ip addr` on Linux and
`ifconfig` on Mac
""" """
if sys.platform == 'darwin': if sys.platform == 'darwin':
ifaces = subprocess.Popen(['ifconfig'], stdout=subprocess.PIPE) ifaces = subprocess.Popen(['ifconfig'], stdout=subprocess.PIPE)
@ -312,6 +315,7 @@ class ElasticSearch(AgentCheck):
def _process_metric(self, data, metric, path, xform=None, dimensions=None): def _process_metric(self, data, metric, path, xform=None, dimensions=None):
"""data: dictionary containing all the stats """data: dictionary containing all the stats
metric: datadog metric metric: datadog metric
path: corresponding path in data, flattened, e.g. thread_pool.bulk.queue path: corresponding path in data, flattened, e.g. thread_pool.bulk.queue
xfom: a lambda to apply to the numerical value xfom: a lambda to apply to the numerical value

View File

@ -1,5 +1,4 @@
""" """Collects metrics from the gunicorn web server.
Collects metrics from the gunicorn web server.
http://gunicorn.org/ http://gunicorn.org/
""" """
@ -42,7 +41,9 @@ class GUnicornCheck(AgentCheck):
return {"psutil": version} return {"psutil": version}
def check(self, instance): def check(self, instance):
""" Collect metrics for the given gunicorn instance. """ """Collect metrics for the given gunicorn instance.
"""
if not psutil: if not psutil:
raise GUnicornCheckError("gunicorn check requires the psutil python package") raise GUnicornCheckError("gunicorn check requires the psutil python package")
@ -107,7 +108,9 @@ class GUnicornCheck(AgentCheck):
@staticmethod @staticmethod
def _get_master_proc_by_name(name): def _get_master_proc_by_name(name):
""" Return a psutil process for the master gunicorn process with the given name. """ """Return a psutil process for the master gunicorn process with the given name.
"""
master_name = GUnicornCheck._get_master_proc_name(name) master_name = GUnicornCheck._get_master_proc_name(name)
master_procs = [ master_procs = [
p for p in psutil.process_iter() if p.cmdline and p.cmdline[0] == master_name] p for p in psutil.process_iter() if p.cmdline and p.cmdline[0] == master_name]
@ -121,7 +124,9 @@ class GUnicornCheck(AgentCheck):
@staticmethod @staticmethod
def _get_master_proc_name(name): def _get_master_proc_name(name):
""" Return the name of the master gunicorn process for the given proc name. """ """Return the name of the master gunicorn process for the given proc name.
"""
# Here's an example of a process list for a gunicorn box with name web1 # Here's an example of a process list for a gunicorn box with name web1
# root 22976 0.1 0.1 60364 13424 ? Ss 19:30 0:00 gunicorn: master [web1] # root 22976 0.1 0.1 60364 13424 ? Ss 19:30 0:00 gunicorn: master [web1]
# web 22984 20.7 2.3 521924 176136 ? Sl 19:30 1:58 gunicorn: worker [web1] # web 22984 20.7 2.3 521924 176136 ? Sl 19:30 1:58 gunicorn: worker [web1]

View File

@ -1,6 +1,6 @@
import urllib2
import time
from collections import defaultdict from collections import defaultdict
import time
import urllib2
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.common.util import headers from monagent.common.util import headers
@ -65,7 +65,9 @@ class HAProxy(AgentCheck):
url=url, collect_status_metrics=collect_status_metrics) url=url, collect_status_metrics=collect_status_metrics)
def _fetch_data(self, url, username, password): def _fetch_data(self, url, username, password):
''' Hit a given URL and return the parsed json ''' """Hit a given URL and return the parsed json.
"""
# Try to fetch data from the stats URL # Try to fetch data from the stats URL
passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
@ -85,8 +87,10 @@ class HAProxy(AgentCheck):
def _process_data(self, data, collect_aggregates_only, process_events, def _process_data(self, data, collect_aggregates_only, process_events,
url=None, collect_status_metrics=False): url=None, collect_status_metrics=False):
''' Main data-processing loop. For each piece of useful data, we'll """Main data-processing loop. For each piece of useful data, we'll
either save a metric, save an event or both. '''
either save a metric, save an event or both.
"""
# Split the first line into an index of fields # Split the first line into an index of fields
# The line looks like: # The line looks like:
@ -164,8 +168,8 @@ class HAProxy(AgentCheck):
def _process_metrics(self, data_list, service, url): def _process_metrics(self, data_list, service, url):
for data in data_list: for data in data_list:
""" """Each element of data_list is a dictionary related to one host
Each element of data_list is a dictionary related to one host
(one line) extracted from the csv. All of these elements should (one line) extracted from the csv. All of these elements should
have the same value for 'pxname' key have the same value for 'pxname' key
It should look like: It should look like:
@ -193,8 +197,9 @@ class HAProxy(AgentCheck):
self.gauge(name, value, dimensions=dimensions) self.gauge(name, value, dimensions=dimensions)
def _process_events(self, data_list, url): def _process_events(self, data_list, url):
''' Main event processing loop. Events will be created for a service """Main event processing loop. Events will be created for a service status change.
status change '''
"""
for data in data_list: for data in data_list:
hostname = data['svname'] hostname = data['svname']
service_name = data['pxname'] service_name = data['pxname']

View File

@ -4,6 +4,7 @@ from monagent.collector.checks import AgentCheck
class HDFSCheck(AgentCheck): class HDFSCheck(AgentCheck):
"""Report on free space and space used in HDFS. """Report on free space and space used in HDFS.
""" """
def check(self, instance): def check(self, instance):

View File

@ -1,22 +1,29 @@
#!/bin/env python #!/bin/env python
"""Monitoring Agent remote host aliveness checker""" """Monitoring Agent remote host aliveness checker.
"""
import socket import socket
import subprocess import subprocess
import sys import sys
from monagent.collector.checks.services_checks import ServicesCheck, Status from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class HostAlive(ServicesCheck): class HostAlive(ServicesCheck):
"""Inherit ServicesCheck class to test if a host is alive or not""" """Inherit ServicesCheck class to test if a host is alive or not.
"""
def __init__(self, name, init_config, agent_config, instances=None): def __init__(self, name, init_config, agent_config, instances=None):
ServicesCheck.__init__(self, name, init_config, agent_config, instances) ServicesCheck.__init__(self, name, init_config, agent_config, instances)
def _test_ssh(self, host, port, timeout=None): def _test_ssh(self, host, port, timeout=None):
""" Connect to the SSH port (typically 22) and look for a banner """ """Connect to the SSH port (typically 22) and look for a banner.
"""
if port is None: if port is None:
port = 22 port = 22
try: try:
@ -46,7 +53,9 @@ class HostAlive(ServicesCheck):
@staticmethod @staticmethod
def _test_ping(host, timeout=None): def _test_ping(host, timeout=None):
""" Attempt to ping the host """ """Attempt to ping the host.
"""
ping_prefix = "ping -c 1 -q " ping_prefix = "ping -c 1 -q "
if timeout is not None: if timeout is not None:
ping_prefix += "-W " + str(timeout) + " " ping_prefix += "-W " + str(timeout) + " "
@ -70,11 +79,15 @@ class HostAlive(ServicesCheck):
return True return True
def _create_status_event(self, status, msg, instance): def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API""" """Does nothing: status events are not yet supported by Mon API.
"""
return return
def _check(self, instance): def _check(self, instance):
"""Run the desired host-alive check againt this host""" """Run the desired host-alive check againt this host.
"""
dimensions = {'target_host': instance['host_name'], 'observer_host': socket.getfqdn()} dimensions = {'target_host': instance['host_name'], 'observer_host': socket.getfqdn()}
# Add per-instance dimensions, if any # Add per-instance dimensions, if any

View File

@ -1,15 +1,20 @@
#!/bin/env python #!/bin/env python
"""Monitoring Agent plugin for HTTP/API checks""" """Monitoring Agent plugin for HTTP/API checks.
"""
import socket
import time
import json import json
import re import re
import socket
import time
from httplib2 import Http, HttpLib2Error, httplib from httplib2 import Http
from httplib2 import httplib
from httplib2 import HttpLib2Error
from monagent.collector.checks.services_checks import ServicesCheck, Status
from monagent.collector.checks.check import AgentCheck from monagent.collector.checks.check import AgentCheck
from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class HTTPCheck(ServicesCheck): class HTTPCheck(ServicesCheck):
@ -38,7 +43,9 @@ class HTTPCheck(ServicesCheck):
return url, username, password, timeout, include_content, headers, response_time, dimensions, ssl, pattern, use_keystone, token return url, username, password, timeout, include_content, headers, response_time, dimensions, ssl, pattern, use_keystone, token
def _create_status_event(self, status, msg, instance): def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API""" """Does nothing: status events are not yet supported by Mon API.
"""
return return
def _check(self, instance): def _check(self, instance):

View File

@ -1,10 +1,8 @@
from collections import defaultdict
from glob import glob
import os import os
import time import time
from collections import defaultdict
from glob import glob
try: try:
from xml.etree.ElementTree import ElementTree from xml.etree.ElementTree import ElementTree
except ImportError: except ImportError:
@ -13,14 +11,14 @@ except ImportError:
except ImportError: except ImportError:
pass pass
from monagent.common.util import get_hostname
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.common.util import get_hostname
class Skip(Exception): class Skip(Exception):
""" """Raised by :class:`Jenkins` when it comes across
Raised by :class:`Jenkins` when it comes across
a build or job that should be excluded from being checked. a build or job that should be excluded from being checked.
""" """

View File

@ -1,13 +1,12 @@
from collections import defaultdict
import sys import sys
import random
if sys.version_info < (2, 6): if sys.version_info < (2, 6):
# Normally we'd write our checks to be compatible with >= python 2.4 but # Normally we'd write our checks to be compatible with >= python 2.4 but
# the dependencies of this check are not compatible with 2.4 and would # the dependencies of this check are not compatible with 2.4 and would
# be too much work to rewrite, so raise an exception here. # be too much work to rewrite, so raise an exception here.
raise Exception('kafka_consumer check requires at least Python 2.6') raise Exception('kafka_consumer check requires at least Python 2.6')
from collections import defaultdict
from monagent.collector.checks import AgentCheck
try: try:
from kafka.client import KafkaClient from kafka.client import KafkaClient
from kafka.common import OffsetRequest from kafka.common import OffsetRequest
@ -18,7 +17,8 @@ try:
from kazoo.exceptions import NoNodeError from kazoo.exceptions import NoNodeError
except ImportError: except ImportError:
raise Exception('Missing python dependency: kazoo (https://github.com/python-zk/kazoo)') raise Exception('Missing python dependency: kazoo (https://github.com/python-zk/kazoo)')
import random
from monagent.collector.checks import AgentCheck
class KafkaCheck(AgentCheck): class KafkaCheck(AgentCheck):

View File

@ -1,6 +1,6 @@
from collections import defaultdict
import re import re
import urllib2 import urllib2
from collections import defaultdict
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -12,6 +12,7 @@ whitespace = re.compile(r'\s')
class KyotoTycoonCheck(AgentCheck): class KyotoTycoonCheck(AgentCheck):
"""Report statistics about the Kyoto Tycoon DBM-style """Report statistics about the Kyoto Tycoon DBM-style
database server (http://fallabs.com/kyototycoon/) database server (http://fallabs.com/kyototycoon/)
""" """

View File

@ -1,8 +1,8 @@
import urllib2 import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Lighttpd(AgentCheck): class Lighttpd(AgentCheck):

View File

@ -1,4 +1,4 @@
from monagent.collector.checks import * from monagent.collector.checks import AgentCheck
# Reference: http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt # Reference: http://code.sixapart.com/svn/memcached/trunk/server/doc/protocol.txt
# Name Type Meaning # Name Type Meaning

View File

@ -1,6 +1,6 @@
import re import re
import types
import time import time
import types
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.common.util import get_hostname from monagent.common.util import get_hostname
@ -109,7 +109,9 @@ class MongoDb(AgentCheck):
def create_event(self, state, server, agentConfig): def create_event(self, state, server, agentConfig):
"""Create an event with a message describing the replication """Create an event with a message describing the replication
state of a mongo node"""
state of a mongo node
"""
def get_state_description(state): def get_state_description(state):
if state == 0: if state == 0:
@ -148,8 +150,8 @@ class MongoDb(AgentCheck):
}) })
def check(self, instance): def check(self, instance):
""" """Returns a dictionary that looks a lot like what's sent back by db.serverStatus().
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
""" """
if 'server' not in instance: if 'server' not in instance:
self.log.warn("Missing 'server' in mongo config") self.log.warn("Missing 'server' in mongo config")

View File

@ -1,7 +1,7 @@
import subprocess
import os import os
import sys
import re import re
import subprocess
import sys
import traceback import traceback
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -235,8 +235,8 @@ class MySql(AgentCheck):
return the_type(dict[key]) return the_type(dict[key])
def _collect_dict(self, metric_type, field_metric_map, query, db, dimensions): def _collect_dict(self, metric_type, field_metric_map, query, db, dimensions):
""" """Query status and get a dictionary back.
Query status and get a dictionary back.
Extract each field out of the dictionary Extract each field out of the dictionary
and stuff it in the corresponding metric. and stuff it in the corresponding metric.

View File

@ -1,5 +1,7 @@
#!/bin/env python #!/bin/env python
"""Monitoring Agent wrapper for Nagios checks""" """Monitoring Agent wrapper for Nagios checks.
"""
import hashlib import hashlib
import json import json
@ -14,16 +16,20 @@ from monagent.collector.checks.services_checks import ServicesCheck, Status
class WrapNagios(ServicesCheck): class WrapNagios(ServicesCheck):
"""Inherit ServicesCheck class to process Nagios checks""" """Inherit ServicesCheck class to process Nagios checks.
"""
def __init__(self, name, init_config, agent_config, instances=None): def __init__(self, name, init_config, agent_config, instances=None):
ServicesCheck.__init__(self, name, init_config, agent_config, instances) ServicesCheck.__init__(self, name, init_config, agent_config, instances)
@staticmethod @staticmethod
def _do_skip_check(instance, last_run_data): def _do_skip_check(instance, last_run_data):
""" Determine whether or not to skip a check depending on """Determine whether or not to skip a check depending on
the checks's check_interval, if specified, and the last
time the check was run """ the checks's check_interval, if specified, and the last
time the check was run
"""
if instance['service_name'] in last_run_data and 'check_interval' in instance: if instance['service_name'] in last_run_data and 'check_interval' in instance:
if time.time() < last_run_data[instance['service_name']] + instance['check_interval']: if time.time() < last_run_data[instance['service_name']] + instance['check_interval']:
return True return True
@ -31,11 +37,15 @@ class WrapNagios(ServicesCheck):
return False return False
def _create_status_event(self, status, msg, instance): def _create_status_event(self, status, msg, instance):
"""Does nothing: status events are not yet supported by Mon API""" """Does nothing: status events are not yet supported by Mon API.
"""
return return
def _check(self, instance): def _check(self, instance):
"""Run the command specified by check_command and capture the result""" """Run the command specified by check_command and capture the result.
"""
dimensions = {'observer_host': socket.getfqdn()} dimensions = {'observer_host': socket.getfqdn()}
# Add per-instance dimensions, if any # Add per-instance dimensions, if any

View File

@ -1,10 +1,10 @@
""" """Collects network metrics.
Collects network metrics.
""" """
# stdlib # stdlib
import subprocess
import re import re
import subprocess
# project # project
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -254,8 +254,8 @@ class Network(AgentCheck):
self._submit_devicemetrics(interface, metrics) self._submit_devicemetrics(interface, metrics)
def _parse_solaris_netstat(self, netstat_output): def _parse_solaris_netstat(self, netstat_output):
""" """Return a mapping of network metrics by interface. For example:
Return a mapping of network metrics by interface. For example:
{ interface: { interface:
{'bytes_out': 0, {'bytes_out': 0,
'bytes_in': 0, 'bytes_in': 0,

View File

@ -1,14 +1,15 @@
import re import re
import urllib2 import urllib2
from monagent.common.util import headers
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.collector.checks.utils import add_basic_auth from monagent.collector.checks.utils import add_basic_auth
from monagent.common.util import headers
class Nginx(AgentCheck): class Nginx(AgentCheck):
"""Tracks basic nginx metrics via the status module """Tracks basic nginx metrics via the status module
* number of connections * number of connections
* number of requets per second * number of requets per second
@ -20,7 +21,6 @@ class Nginx(AgentCheck):
server accepts handled requests server accepts handled requests
1156958 1156958 4491319 1156958 1156958 4491319
Reading: 0 Writing: 2 Waiting: 6 Reading: 0 Writing: 2 Waiting: 6
""" """
def check(self, instance): def check(self, instance):

View File

@ -8,7 +8,8 @@ class ShouldRestartException(Exception):
class PostgreSql(AgentCheck): class PostgreSql(AgentCheck):
"""Collects per-database, and optionally per-relation metrics """Collects per-database, and optionally per-relation metrics.
""" """
RATE = AgentCheck.rate RATE = AgentCheck.rate
@ -120,6 +121,7 @@ SELECT relname,
def _collect_stats(self, key, db, dimensions, relations): def _collect_stats(self, key, db, dimensions, relations):
"""Query pg_stat_* for various metrics """Query pg_stat_* for various metrics
If relations is not an empty list, gather per-relation metrics If relations is not an empty list, gather per-relation metrics
on top of that. on top of that.
""" """
@ -190,7 +192,9 @@ SELECT relname,
[v[0][1](self, v[0][0], v[1], dimensions=dimensions) for v in values] [v[0][1](self, v[0][0], v[1], dimensions=dimensions) for v in values]
def get_connection(self, key, host, port, user, password, dbname, use_cached=True): def get_connection(self, key, host, port, user, password, dbname, use_cached=True):
"Get and memoize connections to instances" """Get and memorize connections to instances.
"""
if key in self.dbs and use_cached: if key in self.dbs and use_cached:
return self.dbs[key] return self.dbs[key]

View File

@ -1,4 +1,6 @@
"""Gather metrics on specific processes""" """Gather metrics on specific processes.
"""
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
from monagent.common.util import Platform from monagent.common.util import Platform
@ -28,8 +30,8 @@ class ProcessCheck(AgentCheck):
return False return False
def find_pids(self, search_string, psutil, exact_match=True): def find_pids(self, search_string, psutil, exact_match=True):
""" """Create a set of pids of selected processes.
Create a set of pids of selected processes.
Search for search_string Search for search_string
""" """
found_process_list = [] found_process_list = []

View File

@ -1,7 +1,7 @@
import json import json
import time
import urllib2 import urllib2
import urlparse import urlparse
import time
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -42,6 +42,7 @@ METRIC_SUFFIX = {QUEUE_TYPE: "queue", NODE_TYPE: "node"}
class RabbitMQ(AgentCheck): class RabbitMQ(AgentCheck):
"""This check is for gathering statistics from the RabbitMQ """This check is for gathering statistics from the RabbitMQ
Management Plugin (http://www.rabbitmq.com/management.html) Management Plugin (http://www.rabbitmq.com/management.html)
""" """
@ -104,8 +105,8 @@ class RabbitMQ(AgentCheck):
return data return data
def get_stats(self, instance, base_url, object_type, max_detailed, specified_list): def get_stats(self, instance, base_url, object_type, max_detailed, specified_list):
""" """instance: the check instance
instance: the check instance
base_url: the url of the rabbitmq management api (e.g. http://localhost:15672/api) base_url: the url of the rabbitmq management api (e.g. http://localhost:15672/api)
object_type: either QUEUE_TYPE or NODE_TYPE object_type: either QUEUE_TYPE or NODE_TYPE
max_detailed: the limit of objects to collect for this type max_detailed: the limit of objects to collect for this type
@ -116,7 +117,8 @@ class RabbitMQ(AgentCheck):
# Make a copy of this list as we will remove items from it at each iteration # Make a copy of this list as we will remove items from it at each iteration
specified_items = list(specified_list) specified_items = list(specified_list)
""" data is a list of nodes or queues: """data is a list of nodes or queues:
data = [ data = [
{'status': 'running', 'node': 'rabbit@host', 'name': 'queue1', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False}, {'status': 'running', 'node': 'rabbit@host', 'name': 'queue1', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},
{'status': 'running', 'node': 'rabbit@host, 'name': 'queue10', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False}, {'status': 'running', 'node': 'rabbit@host, 'name': 'queue10', 'consumers': 0, 'vhost': '/', 'backing_queue_status': {'q1': 0, 'q3': 0, 'q2': 0, 'q4': 0, 'avg_ack_egress_rate': 0.0, 'ram_msg_count': 0, 'ram_ack_count': 0, 'len': 0, 'persistent_count': 0, 'target_ram_count': 'infinity', 'next_seq_id': 0, 'delta': ['delta', 'undefined', 0, 'undefined'], 'pending_acks': 0, 'avg_ack_ingress_rate': 0.0, 'avg_egress_rate': 0.0, 'avg_ingress_rate': 0.0}, 'durable': True, 'idle_since': '2013-10-03 13:38:18', 'exclusive_consumer_tag': '', 'arguments': {}, 'memory': 10956, 'policy': '', 'auto_delete': False},

View File

@ -1,6 +1,6 @@
''' """Redis checks.
Redis checks
''' """
import re import re
import time import time
@ -84,7 +84,9 @@ class Redis(AgentCheck):
return {"redis": version} return {"redis": version}
def _parse_dict_string(self, string, key, default): def _parse_dict_string(self, string, key, default):
"""Take from a more recent redis.py, parse_info""" """Take from a more recent redis.py, parse_info.
"""
try: try:
for item in string.split(','): for item in string.split(','):
k, v = item.rsplit('=', 1) k, v = item.rsplit('=', 1)
@ -94,7 +96,7 @@ class Redis(AgentCheck):
except ValueError: except ValueError:
return v return v
return default return default
except Exception as e: except Exception:
self.log.exception("Cannot parse dictionary string: %s" % string) self.log.exception("Cannot parse dictionary string: %s" % string)
return default return default
@ -144,7 +146,7 @@ class Redis(AgentCheck):
start = time.time() start = time.time()
try: try:
info = conn.info() info = conn.info()
except ValueError as e: except ValueError:
# This is likely a know issue with redis library 2.0.0 # This is likely a know issue with redis library 2.0.0
# See https://github.com/DataDog/dd-agent/issues/374 for details # See https://github.com/DataDog/dd-agent/issues/374 for details
import redis import redis

View File

@ -1,9 +1,9 @@
from hashlib import md5 from hashlib import md5
from httplib2 import Http
from httplib2 import HttpLib2Error
import json import json
import time
import socket import socket
import time
from httplib2 import Http, HttpLib2Error
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -60,15 +60,15 @@ class Riak(AgentCheck):
h = Http(timeout=timeout) h = Http(timeout=timeout)
resp, content = h.request(url, "GET") resp, content = h.request(url, "GET")
except socket.timeout as e: except socket.timeout:
self.timeout_event(url, timeout, aggregation_key) self.timeout_event(url, timeout, aggregation_key)
return return
except socket.error as e: except socket.error:
self.timeout_event(url, timeout, aggregation_key) self.timeout_event(url, timeout, aggregation_key)
return return
except HttpLib2Error as e: except HttpLib2Error:
self.timeout_event(url, timeout, aggregation_key) self.timeout_event(url, timeout, aggregation_key)
return return

View File

@ -1,6 +1,6 @@
''' """Check the performance counters from SQL Server.
Check the performance counters from SQL Server
''' """
import traceback import traceback
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -39,14 +39,16 @@ class SQLServer(AgentCheck):
@staticmethod @staticmethod
def _conn_key(host, username, password, database): def _conn_key(host, username, password, database):
''' Return a key to use for the connection cache """Return a key to use for the connection cache.
'''
"""
return '%s:%s:%s:%s' % (host, username, password, database) return '%s:%s:%s:%s' % (host, username, password, database)
@staticmethod @staticmethod
def _conn_string(host, username, password, database): def _conn_string(host, username, password, database):
''' Return a connection string to use with adodbapi """Return a connection string to use with adodbapi.
'''
"""
conn_str = 'Provider=SQLOLEDB;Data Source=%s;Initial Catalog=%s;' % (host, database) conn_str = 'Provider=SQLOLEDB;Data Source=%s;Initial Catalog=%s;' % (host, database)
if username: if username:
conn_str += 'User ID=%s;' % (username) conn_str += 'User ID=%s;' % (username)
@ -74,7 +76,7 @@ class SQLServer(AgentCheck):
conn_str = self._conn_string(host, username, password, database) conn_str = self._conn_string(host, username, password, database)
conn = adodbapi.connect(conn_str) conn = adodbapi.connect(conn_str)
self.connections[conn_key] = conn self.connections[conn_key] = conn
except Exception as e: except Exception:
cx = "%s - %s" % (host, database) cx = "%s - %s" % (host, database)
raise Exception("Unable to connect to SQL Server for instance %s.\n %s" raise Exception("Unable to connect to SQL Server for instance %s.\n %s"
% (cx, traceback.format_exc())) % (cx, traceback.format_exc()))
@ -84,8 +86,9 @@ class SQLServer(AgentCheck):
self._fetch_metrics(cursor, dimensions) self._fetch_metrics(cursor, dimensions)
def _fetch_metrics(self, cursor, custom_dimensions): def _fetch_metrics(self, cursor, custom_dimensions):
''' Fetch the metrics from the sys.dm_os_performance_counters table """Fetch the metrics from the sys.dm_os_performance_counters table.
'''
"""
for metric in self.METRICS: for metric in self.METRICS:
# Normalize all rows to the same size for easy of use # Normalize all rows to the same size for easy of use
if len(metric) == 3: if len(metric) == 3:
@ -100,7 +103,7 @@ class SQLServer(AgentCheck):
if instance_n == ALL_INSTANCES: if instance_n == ALL_INSTANCES:
try: try:
self._fetch_all_instances(metric, cursor, custom_dimensions) self._fetch_all_instances(metric, cursor, custom_dimensions)
except Exception as e: except Exception:
self.log.exception('Unable to fetch metric: %s' % mname) self.log.exception('Unable to fetch metric: %s' % mname)
self.warning('Unable to fetch metric: %s' % mname) self.warning('Unable to fetch metric: %s' % mname)
else: else:
@ -112,7 +115,7 @@ class SQLServer(AgentCheck):
and instance_name = ? and instance_name = ?
""", (counter, instance_n)) """, (counter, instance_n))
(value,) = cursor.fetchone() (value,) = cursor.fetchone()
except Exception as e: except Exception:
self.log.exception('Unable to fetch metric: %s' % mname) self.log.exception('Unable to fetch metric: %s' % mname)
self.warning('Unable to fetch metric: %s' % mname) self.warning('Unable to fetch metric: %s' % mname)
continue continue

View File

@ -1,7 +1,9 @@
import socket import socket
import time import time
from monagent.collector.checks.services_checks import ServicesCheck, Status, EventType from monagent.collector.checks.services_checks import EventType
from monagent.collector.checks.services_checks import ServicesCheck
from monagent.collector.checks.services_checks import Status
class BadConfException(Exception): class BadConfException(Exception):

View File

@ -1,6 +1,6 @@
import xml.parsers.expat # python 2.4 compatible
import re import re
import subprocess import subprocess
import xml.parsers.expat # python 2.4 compatible
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck

View File

@ -1,8 +1,8 @@
''' """Monitor the Windows Event Log.
Monitor the Windows Event Log
''' """
from datetime import datetime
import calendar import calendar
from datetime import datetime
try: try:
import wmi import wmi
except Exception: except Exception:
@ -75,9 +75,10 @@ class Win32EventLog(AgentCheck):
@staticmethod @staticmethod
def _instance_key(instance): def _instance_key(instance):
''' Generate a unique key per instance for use with keeping track of """Generate a unique key per instance for use with keeping track of
state for each instance.
''' state for each instance.
"""
return '%s' % (instance) return '%s' % (instance)
@ -95,7 +96,9 @@ class EventLogQuery(object):
self.start_ts = start_ts self.start_ts = start_ts
def to_wql(self): def to_wql(self):
''' Return this query as a WQL string. ''' """Return this query as a WQL string.
"""
wql = """ wql = """
SELECT Message, SourceName, TimeGenerated, Type, User, InsertionStrings SELECT Message, SourceName, TimeGenerated, Type, User, InsertionStrings
FROM Win32_NTLogEvent FROM Win32_NTLogEvent
@ -125,9 +128,10 @@ class EventLogQuery(object):
@staticmethod @staticmethod
def _add_message_filter(msg_filter, q): def _add_message_filter(msg_filter, q):
''' Filter on the message text using a LIKE query. If the filter starts """Filter on the message text using a LIKE query. If the filter starts
with '-' then we'll assume that it's a NOT LIKE filter.
''' with '-' then we'll assume that it's a NOT LIKE filter.
"""
if msg_filter.startswith('-'): if msg_filter.startswith('-'):
msg_filter = msg_filter[1:] msg_filter = msg_filter[1:]
q += '\nAND NOT Message LIKE "%s"' % msg_filter q += '\nAND NOT Message LIKE "%s"' % msg_filter
@ -137,18 +141,19 @@ class EventLogQuery(object):
@staticmethod @staticmethod
def _dt_to_wmi(dt): def _dt_to_wmi(dt):
''' A wrapper around wmi.from_time to get a WMI-formatted time from a """A wrapper around wmi.from_time to get a WMI-formatted time from a time struct.
time struct.
''' """
return wmi.from_time(year=dt.year, month=dt.month, day=dt.day, return wmi.from_time(year=dt.year, month=dt.month, day=dt.day,
hours=dt.hour, minutes=dt.minute, seconds=dt.second, microseconds=0, hours=dt.hour, minutes=dt.minute, seconds=dt.second, microseconds=0,
timezone=0) timezone=0)
@staticmethod @staticmethod
def _convert_event_types(types): def _convert_event_types(types):
''' Detect if we are running on <= Server 2003. If so, we should convert """Detect if we are running on <= Server 2003. If so, we should convert
the EventType values to integers the EventType values to integers
''' """
return types return types
@ -177,15 +182,18 @@ class LogEvent(object):
} }
def is_after(self, ts): def is_after(self, ts):
''' Compare this event's timestamp to a give timestamp. ''' """Compare this event's timestamp to a give timestamp.
"""
if self.timestamp >= int(calendar.timegm(ts.timetuple())): if self.timestamp >= int(calendar.timegm(ts.timetuple())):
return True return True
return False return False
@staticmethod @staticmethod
def _wmi_to_ts(wmi_ts): def _wmi_to_ts(wmi_ts):
''' Convert a wmi formatted timestamp into an epoch using wmi.to_time(). """Convert a wmi formatted timestamp into an epoch using wmi.to_time().
'''
"""
year, month, day, hour, minute, second, microsecond, tz = \ year, month, day, hour, minute, second, microsecond, tz = \
wmi.to_time(wmi_ts) wmi.to_time(wmi_ts)
dt = datetime(year=year, month=month, day=day, hour=hour, minute=minute, dt = datetime(year=year, month=month, day=day, hour=hour, minute=minute,

View File

@ -1,10 +1,9 @@
''' """Windows Only.
Windows Only.
Generic WMI check. This check allows you to specify particular metrics that you Generic WMI check. This check allows you to specify particular metrics that you
want from WMI in your configuration. Check wmi.yaml.example in your conf.d want from WMI in your configuration. Check wmi.yaml.example in your conf.d
directory for more details on configuration. directory for more details on configuration.
''' """
try: try:
import wmi import wmi
except Exception: except Exception:

View File

@ -1,5 +1,4 @@
''' """Parses the response from zookeeper's `stat` admin command, which looks like:
Parses the response from zookeeper's `stat` admin command, which looks like:
``` ```
Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
@ -21,13 +20,12 @@ Node count: 487
``` ```
Tested with Zookeeper versions 3.0.0 to 3.4.5 Tested with Zookeeper versions 3.0.0 to 3.4.5
"""
'''
import re import re
import socket import socket
import struct
from StringIO import StringIO from StringIO import StringIO
import struct
from monagent.collector.checks import AgentCheck from monagent.collector.checks import AgentCheck
@ -87,9 +85,10 @@ class Zookeeper(AgentCheck):
@classmethod @classmethod
def parse_stat(cls, buf): def parse_stat(cls, buf):
''' `buf` is a readable file-like object """`buf` is a readable file-like object
returns a tuple: ([(metric_name, value)], dimensions)
''' returns a tuple: ([(metric_name, value)], dimensions)
"""
metrics = [] metrics = []
buf.seek(0) buf.seek(0)

View File

@ -1,19 +1,25 @@
#!/usr/bin/env python #!/usr/bin/env python
# set up logging before importing any other components
from monagent.common.config import get_version, initialize_logging
initialize_logging('collector')
import os
os.umask(0o22)
# Core modules # Core modules
import glob
import logging import logging
import os.path import os
import signal import signal
import sys import sys
import time import time
import glob
# Custom modules
import checks.collector
import jmxfetch
import monagent.common.check_status
import monagent.common.config
import monagent.common.daemon
import monagent.common.emitter
import monagent.common.util
# set up logging before importing any other components
monagent.common.config.initialize_logging('collector')
os.umask(0o22)
# Check we're not using an old version of Python. We need 2.4 above because # Check we're not using an old version of Python. We need 2.4 above because
# some modules (like subprocess) were only introduced in 2.4. # some modules (like subprocess) were only introduced in 2.4.
@ -21,16 +27,6 @@ if int(sys.version_info[1]) <= 3:
sys.stderr.write("Monasca Agent requires python 2.4 or later.\n") sys.stderr.write("Monasca Agent requires python 2.4 or later.\n")
sys.exit(2) sys.exit(2)
# Custom modules
from checks.collector import Collector
from monagent.common.check_status import CollectorStatus, ForwarderStatus
from monagent.common.config import get_config, get_parsed_args, load_check_directory, get_confd_path, check_yaml, get_logging_config
from monagent.common.daemon import Daemon, AgentSupervisor
from monagent.common.emitter import http_emitter
from monagent.common.util import Watchdog, PidFile, get_os
from jmxfetch import JMXFetch, JMX_LIST_COMMANDS
# Constants # Constants
PID_NAME = "monasca-agent" PID_NAME = "monasca-agent"
WATCHDOG_MULTIPLIER = 10 WATCHDOG_MULTIPLIER = 10
@ -43,14 +39,14 @@ log = logging.getLogger('collector')
# todo the collector has daemon code but is always run in foreground mode # todo the collector has daemon code but is always run in foreground mode
# from the supervisor, is there a reason for the daemon code then? # from the supervisor, is there a reason for the daemon code then?
class CollectorDaemon(Daemon): class CollectorDaemon(monagent.common.daemon.Daemon):
"""The agent class is a daemon that runs the collector in a background process.
"""
The agent class is a daemon that runs the collector in a background process.
""" """
def __init__(self, pidfile, autorestart, start_event=True): def __init__(self, pidfile, autorestart, start_event=True):
Daemon.__init__(self, pidfile, autorestart=autorestart) monagent.common.daemon.Daemon.__init__(self, pidfile, autorestart=autorestart)
self.run_forever = True self.run_forever = True
self.collector = None self.collector = None
self.start_event = start_event self.start_event = start_event
@ -59,8 +55,8 @@ class CollectorDaemon(Daemon):
log.debug("Caught sigterm. Stopping run loop.") log.debug("Caught sigterm. Stopping run loop.")
self.run_forever = False self.run_forever = False
if JMXFetch.is_running(): if jmxfetch.JMXFetch.is_running():
JMXFetch.stop() jmxfetch.JMXFetch.stop()
if self.collector: if self.collector:
self.collector.stop() self.collector.stop()
@ -72,10 +68,12 @@ class CollectorDaemon(Daemon):
def info(self, verbose=None): def info(self, verbose=None):
logging.getLogger().setLevel(logging.ERROR) logging.getLogger().setLevel(logging.ERROR)
return CollectorStatus.print_latest_status(verbose=verbose) return monagent.common.check_status.CollectorStatus.print_latest_status(verbose=verbose)
def run(self, config=None): def run(self, config=None):
"""Main loop of the collector""" """Main loop of the collector.
"""
# Gracefully exit on sigterm. # Gracefully exit on sigterm.
signal.signal(signal.SIGTERM, self._handle_sigterm) signal.signal(signal.SIGTERM, self._handle_sigterm)
@ -87,15 +85,15 @@ class CollectorDaemon(Daemon):
signal.signal(signal.SIGINT, self._handle_sigterm) signal.signal(signal.SIGINT, self._handle_sigterm)
# Save the agent start-up stats. # Save the agent start-up stats.
CollectorStatus().persist() monagent.common.check_status.CollectorStatus().persist()
# Intialize the collector. # Intialize the collector.
if config is None: if config is None:
config = get_config(parse_args=True) config = monagent.common.config.get_config(parse_args=True)
# Load the checks_d checks # Load the checks_d checks
checksd = load_check_directory(config) checksd = monagent.common.config.load_check_directory(config)
self.collector = Collector(config, http_emitter, checksd) self.collector = checks.collector.Collector(config, monagent.common.emitter.http_emitter, checksd)
# Configure the watchdog. # Configure the watchdog.
check_frequency = int(config['check_freq']) check_frequency = int(config['check_freq'])
@ -127,9 +125,9 @@ class CollectorDaemon(Daemon):
if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled: if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled:
try: try:
profiler.disable() profiler.disable()
import cStringIO
import pstats import pstats
from cStringIO import StringIO s = cStringIO.StringIO()
s = StringIO()
ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative") ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
ps.print_stats() ps.print_stats()
log.debug(s.getvalue()) log.debug(s.getvalue())
@ -149,7 +147,7 @@ class CollectorDaemon(Daemon):
# Now clean-up. # Now clean-up.
try: try:
CollectorStatus.remove_latest_status() monagent.common.check_status.CollectorStatus.remove_latest_status()
except Exception: except Exception:
pass pass
@ -162,8 +160,9 @@ class CollectorDaemon(Daemon):
def _get_watchdog(check_freq, agentConfig): def _get_watchdog(check_freq, agentConfig):
watchdog = None watchdog = None
if agentConfig.get("watchdog", True): if agentConfig.get("watchdog", True):
watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER, watchdog = monagent.common.util.Watchdog(check_freq * WATCHDOG_MULTIPLIER,
max_mem_mb=agentConfig.get('limit_memory_consumption', None)) max_mem_mb=agentConfig.get('limit_memory_consumption',
None))
watchdog.reset() watchdog.reset()
return watchdog return watchdog
@ -176,12 +175,12 @@ class CollectorDaemon(Daemon):
log.info("Running an auto-restart.") log.info("Running an auto-restart.")
if self.collector: if self.collector:
self.collector.stop() self.collector.stop()
sys.exit(AgentSupervisor.RESTART_EXIT_STATUS) sys.exit(monagent.common.daemon.AgentSupervisor.RESTART_EXIT_STATUS)
def main(): def main():
options, args = get_parsed_args() options, args = monagent.common.config.get_parsed_args()
agentConfig = get_config(options=options) agentConfig = monagent.common.config.get_config(options=options)
# todo autorestart isn't used remove # todo autorestart isn't used remove
autorestart = agentConfig.get('autorestart', False) autorestart = agentConfig.get('autorestart', False)
@ -207,7 +206,7 @@ def main():
sys.stderr.write("Unknown command: %s\n" % command) sys.stderr.write("Unknown command: %s\n" % command)
return 3 return 3
pid_file = PidFile('monasca-agent') pid_file = monagent.common.util.PidFile('monasca-agent')
if options.clean: if options.clean:
pid_file.clean() pid_file.clean()
@ -215,7 +214,7 @@ def main():
agent = CollectorDaemon(pid_file.get_path(), autorestart) agent = CollectorDaemon(pid_file.get_path(), autorestart)
if command in START_COMMANDS: if command in START_COMMANDS:
log.info('Agent version %s' % get_version()) log.info('Agent version %s' % monagent.common.config.get_version())
if 'start' == command: if 'start' == command:
log.info('Start daemon') log.info('Start daemon')
@ -247,34 +246,29 @@ def main():
def parent_func(): def parent_func():
agent.start_event = False agent.start_event = False
AgentSupervisor.start(parent_func, child_func) monagent.common.daemon.AgentSupervisor.start(parent_func, child_func)
else: else:
# Run in the standard foreground. # Run in the standard foreground.
agent.run(config=agentConfig) agent.run(config=agentConfig)
elif 'check' == command: elif 'check' == command:
check_name = args[1] check_name = args[1]
try: checks = monagent.common.config.load_check_directory(agentConfig)
# Try the old-style check first for check in checks['initialized_checks']:
print(getattr(collector.checks.collector, check_name)(log).check(agentConfig)) if check.name == check_name:
except Exception: check.run()
# If not an old-style check, try checks_d print("Metrics: ")
checks = load_check_directory(agentConfig) check.get_metrics(prettyprint=True)
for check in checks['initialized_checks']: if len(args) == 3 and args[2] == 'check_rate':
if check.name == check_name: print("Running 2nd iteration to capture rate metrics")
time.sleep(1)
check.run() check.run()
print("Metrics: ") print("Metrics: ")
check.get_metrics(prettyprint=True) check.get_metrics(prettyprint=True)
if len(args) == 3 and args[2] == 'check_rate':
print("Running 2nd iteration to capture rate metrics")
time.sleep(1)
check.run()
print("Metrics: ")
check.get_metrics(prettyprint=True)
elif 'check_all' == command: elif 'check_all' == command:
print("Loading check directory...") print("Loading check directory...")
checks = load_check_directory(agentConfig) checks = monagent.common.config.load_check_directory(agentConfig)
print("...directory loaded.\n") print("...directory loaded.\n")
for check in checks['initialized_checks']: for check in checks['initialized_checks']:
print("#" * 80) print("#" * 80)
@ -285,12 +279,12 @@ def main():
print("#" * 80 + "\n\n") print("#" * 80 + "\n\n")
elif 'configcheck' == command or 'configtest' == command: elif 'configcheck' == command or 'configtest' == command:
osname = get_os() osname = monagent.common.util.get_os()
all_valid = True all_valid = True
for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")): for conf_path in glob.glob(os.path.join(monagent.common.config.get_confd_path(osname), "*.yaml")):
basename = os.path.basename(conf_path) basename = os.path.basename(conf_path)
try: try:
check_yaml(conf_path) monagent.common.config.check_yaml(conf_path)
except Exception as e: except Exception as e:
all_valid = False all_valid = False
print("%s contains errors:\n %s" % (basename, e)) print("%s contains errors:\n %s" % (basename, e))
@ -307,14 +301,14 @@ def main():
elif 'jmx' == command: elif 'jmx' == command:
if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys(): if len(args) < 2 or args[1] not in jmxfetch.JMX_LIST_COMMANDS.keys():
print("#" * 80) print("#" * 80)
print("JMX tool to be used to help configuring your JMX checks.") print("JMX tool to be used to help configuring your JMX checks.")
print("See http://docs.datadoghq.com/integrations/java/ for more information") print("See http://docs.datadoghq.com/integrations/java/ for more information")
print("#" * 80) print("#" * 80)
print("\n") print("\n")
print("You have to specify one of the following command:") print("You have to specify one of the following command:")
for command, desc in JMX_LIST_COMMANDS.iteritems(): for command, desc in jmxfetch.JMX_LIST_COMMANDS.iteritems():
print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)) print(" - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc))
print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr") print("Example: sudo /etc/init.d/monasca-agent jmx list_matching_attributes tomcat jmx solr")
print("\n") print("\n")
@ -322,11 +316,11 @@ def main():
else: else:
jmx_command = args[1] jmx_command = args[1]
checks_list = args[2:] checks_list = args[2:]
confd_directory = get_confd_path(get_os()) confd_directory = monagent.common.config.get_confd_path(monagent.common.util.get_os())
should_run = JMXFetch.init( should_run = jmxfetch.JMXFetch.init(
confd_directory, confd_directory,
agentConfig, agentConfig,
get_logging_config(), monagent.common.config.get_logging_config(),
15, 15,
jmx_command, jmx_command,
checks_list, checks_list,

View File

@ -1,7 +1,7 @@
from datetime import datetime from datetime import datetime
import re import re
from collector.dogstream import common from monagent.collector.dogstream import common
LOG4J_PRIORITY = [ LOG4J_PRIORITY = [

View File

@ -1,14 +1,11 @@
""" """Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
Custom parser for supervisord log suitable for use by Datadog 'dogstreams'
Add to datadog.conf as follows: Add to datadog.conf as follows:
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
dogstreams: [path_to_supervisord.log]:datadog.streams.supervisord:parse_supervisord
""" """
from datetime import datetime from datetime import datetime
import time
import re import re
import time
EVENT_TYPE = "supervisor" EVENT_TYPE = "supervisor"
@ -37,8 +34,8 @@ program_matcher = re.compile("^\w+:? '?(?P<program>\w+)'?")
def parse_supervisord(log, line): def parse_supervisord(log, line):
""" """Parse the supervisord.log line into a dogstream event.
Parse the supervisord.log line into a dogstream event
""" """
if len(line) == 0: if len(line) == 0:
log.info("Skipping empty line of supervisord.log") log.info("Skipping empty line of supervisord.log")
@ -71,9 +68,9 @@ def parse_supervisord(log, line):
return None return None
if __name__ == "__main__": if __name__ == "__main__":
import sys
import pprint
import logging import logging
import pprint
import sys
logging.basicConfig() logging.basicConfig()
log = logging.getLogger() log = logging.getLogger()
lines = open(sys.argv[1]).readlines() lines = open(sys.argv[1]).readlines()

View File

@ -1,19 +1,14 @@
# std
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import os
import logging
import glob import glob
import logging
import os
import signal import signal
import subprocess import subprocess
import tempfile import tempfile
import time import time
from monagent.common.util import PidFile, get_os import yaml
import monagent.common.util
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -55,7 +50,7 @@ class InvalidJMXConfiguration(Exception):
class JMXFetch(object): class JMXFetch(object):
pid_file = PidFile("jmxfetch") pid_file = monagent.common.util.PidFile("jmxfetch")
pid_file_path = pid_file.get_path() pid_file_path = pid_file.get_path()
@classmethod @classmethod
@ -95,27 +90,26 @@ class JMXFetch(object):
@classmethod @classmethod
def should_run(cls, confd_path, checks_list): def should_run(cls, confd_path, checks_list):
""" '''Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options).
Return a tuple (jmx_checks, invalid_checks, java_bin_path, java_options)
jmx_checks: list of yaml files that are jmx checks jmx_checks: list of yaml files that are jmx checks
(they have the is_jmx flag enabled or they are in JMX_CHECKS) (they have the is_jmx flag enabled or they are in JMX_CHECKS)
and that have at least one instance configured and that have at least one instance configured
invalid_checks: dictionary whose keys are check names that are JMX checks but invalid_checks: dictionary whose keys are check names that are JMX checks but
they have a bad configuration. Values of the dictionary are exceptions generated they have a bad configuration. Values of the dictionary are exceptions generated
when checking the configuration when checking the configuration
java_bin_path: is the path to the java executable. It was java_bin_path: is the path to the java executable. It was
previously set in the "instance" part of the yaml file of the previously set in the "instance" part of the yaml file of the
jmx check. So we need to parse yaml files to get it. jmx check. So we need to parse yaml files to get it.
We assume that this value is alwayws the same for every jmx check We assume that this value is alwayws the same for every jmx check
so we can return the first value returned so we can return the first value returned
java_options: is string contains options that will be passed to java_bin_path java_options: is string contains options that will be passed to java_bin_path
We assume that this value is alwayws the same for every jmx check We assume that this value is alwayws the same for every jmx check
so we can return the first value returned so we can return the first value returned
""" '''
jmx_checks = [] jmx_checks = []
java_bin_path = None java_bin_path = None
@ -129,6 +123,10 @@ class JMXFetch(object):
if os.path.exists(conf): if os.path.exists(conf):
f = open(conf) f = open(conf)
try: try:
if hasattr(yaml, 'CLoader'):
Loader = yaml.CLoader
else:
Loader = yaml.Loader
check_config = yaml.load(f.read(), Loader=Loader) check_config = yaml.load(f.read(), Loader=Loader)
assert check_config is not None assert check_config is not None
f.close() f.close()
@ -236,7 +234,7 @@ class JMXFetch(object):
except Exception: except Exception:
return False return False
if get_os() != 'windows': if monagent.common.util.get_os() != 'windows':
try: try:
os.kill(pid, 0) os.kill(pid, 0)
# os.kill(pid, 0) will throw an exception if pid is not running # os.kill(pid, 0) will throw an exception if pid is not running
@ -292,7 +290,7 @@ class JMXFetch(object):
@classmethod @classmethod
def get_path_to_jmxfetch(cls): def get_path_to_jmxfetch(cls):
if get_os() != 'windows': if monagent.common.util.get_os() != 'windows':
return os.path.realpath( return os.path.realpath(
os.path.join(os.path.abspath(__file__), "..", "../collector/checks", "libs", os.path.join(os.path.abspath(__file__), "..", "../collector/checks", "libs",
JMX_FETCH_JAR_NAME)) JMX_FETCH_JAR_NAME))

View File

@ -1,16 +1,18 @@
""" Tools for loading Python modules from arbitrary locations. """Tools for loading Python modules from arbitrary locations.
""" """
import os
import imp import imp
import os
import sys import sys
# todo seems to be only used by dogstream at this point, possibly remove? # todo seems to be only used by dogstream at this point, possibly remove?
def imp_type_for_filename(filename): def imp_type_for_filename(filename):
"""Given the name of a Python module, return a type description suitable to """Given the name of a Python module, return a type description suitable to be passed to imp.load_module().
be passed to imp.load_module()"""
"""
for type_data in imp.get_suffixes(): for type_data in imp.get_suffixes():
extension = type_data[0] extension = type_data[0]
if filename.endswith(extension): if filename.endswith(extension):
@ -19,7 +21,9 @@ def imp_type_for_filename(filename):
def load_qualified_module(full_module_name, path=None): def load_qualified_module(full_module_name, path=None):
"""Load a module which may be within a package""" """Load a module which may be within a package.
"""
remaining_pieces = full_module_name.split('.') remaining_pieces = full_module_name.split('.')
done_pieces = [] done_pieces = []
file_obj = None file_obj = None
@ -40,7 +44,8 @@ def module_name_for_filename(filename):
"""Given the name of a Python file, find an appropropriate module name. """Given the name of a Python file, find an appropropriate module name.
This involves determining whether the file is within a package, and This involves determining whether the file is within a package, and
determining the name of same.""" determining the name of same.
"""
all_segments = filename.split(os.sep) all_segments = filename.split(os.sep)
path_elements = all_segments[:-1] path_elements = all_segments[:-1]
module_elements = [all_segments[-1].rsplit('.', 1)[0]] module_elements = [all_segments[-1].rsplit('.', 1)[0]]
@ -52,10 +57,10 @@ def module_name_for_filename(filename):
def get_module(name): def get_module(name):
"""Given either an absolute path to a Python file or a module name, load """Given either an absolute path to a Python file or a module name, load and return a Python module.
and return a Python module.
If the module is already loaded, takes no action.""" If the module is already loaded, takes no action.
"""
if name.startswith('/'): if name.startswith('/'):
basename, modulename = module_name_for_filename(name) basename, modulename = module_name_for_filename(name)
path = [basename] path = [basename]
@ -68,8 +73,9 @@ def get_module(name):
def load(config_string, default_name=None): def load(config_string, default_name=None):
"""Given a module name and an object expected to be contained within, """Given a module name and an object expected to be contained within, return said object.
return said object"""
"""
(module_name, object_name) = (config_string.rsplit(':', 1) + [default_name])[:2] (module_name, object_name) = (config_string.rsplit(':', 1) + [default_name])[:2]
module = get_module(module_name) module = get_module(module_name)
if object_name: if object_name:

View File

@ -709,6 +709,6 @@ def get_jmx_status():
return check_statuses return check_statuses
except Exception as e: except Exception:
log.exception("Couldn't load latest jmx status") log.exception("Couldn't load latest jmx status")
return [] return []

View File

@ -75,8 +75,6 @@ def _windows_commondata_path():
import ctypes import ctypes
from ctypes import wintypes, windll from ctypes import wintypes, windll
CSIDL_COMMON_APPDATA = 35
_SHGetFolderPath = windll.shell32.SHGetFolderPathW _SHGetFolderPath = windll.shell32.SHGetFolderPathW
_SHGetFolderPath.argtypes = [wintypes.HWND, _SHGetFolderPath.argtypes = [wintypes.HWND,
ctypes.c_int, ctypes.c_int,
@ -84,7 +82,6 @@ def _windows_commondata_path():
wintypes.DWORD, wintypes.LPCWSTR] wintypes.DWORD, wintypes.LPCWSTR]
path_buf = wintypes.create_unicode_buffer(wintypes.MAX_PATH) path_buf = wintypes.create_unicode_buffer(wintypes.MAX_PATH)
result = _SHGetFolderPath(0, CSIDL_COMMON_APPDATA, 0, 0, path_buf)
return path_buf.value return path_buf.value
@ -488,7 +485,6 @@ def get_win32service_file(osname, filename):
def check_yaml(conf_path): def check_yaml(conf_path):
f = open(conf_path) f = open(conf_path)
check_name = os.path.basename(conf_path).split('.')[0]
try: try:
check_config = yaml.load(f.read(), Loader=Loader) check_config = yaml.load(f.read(), Loader=Loader)
assert 'init_config' in check_config, "No 'init_config' section found" assert 'init_config' in check_config, "No 'init_config' section found"
@ -588,7 +584,6 @@ def load_check_directory(agent_config):
# Check if the config exists OR we match the old-style config # Check if the config exists OR we match the old-style config
conf_path = os.path.join(confd_path, '%s.yaml' % check_name) conf_path = os.path.join(confd_path, '%s.yaml' % check_name)
if os.path.exists(conf_path): if os.path.exists(conf_path):
f = open(conf_path)
try: try:
check_config = check_yaml(conf_path) check_config = check_yaml(conf_path)
except Exception as e: except Exception as e:

View File

@ -35,7 +35,6 @@ class AgentSupervisor(object):
`child_func` is a function that should be run by the forked child `child_func` is a function that should be run by the forked child
that will auto-restart with the RESTART_EXIT_STATUS. that will auto-restart with the RESTART_EXIT_STATUS.
''' '''
exit_code = cls.RESTART_EXIT_STATUS
# Allow the child process to die on SIGTERM # Allow the child process to die on SIGTERM
signal.signal(signal.SIGTERM, cls._handle_sigterm) signal.signal(signal.SIGTERM, cls._handle_sigterm)
@ -47,7 +46,6 @@ class AgentSupervisor(object):
# The parent waits on the child. # The parent waits on the child.
cls.child_pid = pid cls.child_pid = pid
_, status = os.waitpid(pid, 0) _, status = os.waitpid(pid, 0)
exit_code = status >> 8
if parent_func is not None: if parent_func is not None:
parent_func() parent_func()
else: else:

View File

@ -172,7 +172,7 @@ def get_hostname(config=None):
if hostname is None: if hostname is None:
try: try:
socket_hostname = socket.gethostname() socket_hostname = socket.gethostname()
except socket.error as e: except socket.error:
socket_hostname = None socket_hostname = None
if socket_hostname and is_valid_hostname(socket_hostname): if socket_hostname and is_valid_hostname(socket_hostname):
hostname = socket_hostname hostname = socket_hostname

View File

@ -114,5 +114,5 @@ class Reporter(threading.Thread):
event_count=event_count, event_count=event_count,
).persist() ).persist()
except Exception as e: except Exception:
log.exception("Error flushing metrics") log.exception("Error flushing metrics")

View File

@ -38,7 +38,7 @@ class Server(object):
try: try:
self.forward_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.forward_udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.forward_udp_sock.connect((forward_to_host, forward_to_port)) self.forward_udp_sock.connect((forward_to_host, forward_to_port))
except Exception as e: except Exception:
log.exception("Error while setting up connection to external statsd server") log.exception("Error while setting up connection to external statsd server")
@staticmethod @staticmethod
@ -83,7 +83,7 @@ class Server(object):
elif m[0] == u'#': elif m[0] == u'#':
event['dimensions'] = sorted(m[1:].split(u',')) event['dimensions'] = sorted(m[1:].split(u','))
return event return event
except IndexError as ValueError: except IndexError:
raise Exception(u'Unparseable event packet: %s' % packet) raise Exception(u'Unparseable event packet: %s' % packet)
@staticmethod @staticmethod
@ -194,7 +194,7 @@ class Server(object):
raise raise
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
break break
except Exception as e: except Exception:
log.exception('Error receiving datagram') log.exception('Error receiving datagram')
def stop(self): def stop(self):

View File

@ -21,6 +21,8 @@ from ddagent import Application
from win32.common import handle_exe_click from win32.common import handle_exe_click
from collector.jmxfetch import JMXFetch from collector.jmxfetch import JMXFetch
from monagent.common.config import get_config, load_check_directory, set_win32_cert_path
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
RESTART_INTERVAL = 24 * 60 * 60 # Defaults to 1 day RESTART_INTERVAL = 24 * 60 * 60 # Defaults to 1 day
@ -118,8 +120,7 @@ class DDAgent(multiprocessing.Process):
def run(self): def run(self):
log.debug("Windows Service - Starting collector") log.debug("Windows Service - Starting collector")
emitters = self.get_emitters() emitters = self.get_emitters()
systemStats = get_system_stats() self.collector = Collector(self.config, emitters)
self.collector = Collector(self.config, emitters, systemStats)
# Load the checks_d checks # Load the checks_d checks
checksd = load_check_directory(self.config) checksd = load_check_directory(self.config)

View File

@ -1,4 +1,4 @@
# Copyright © 2009-2010 CEA # Copyright <EFBFBD><EFBFBD> 2009-2010 CEA
# Pierre Raybaut # Pierre Raybaut
# Licensed under the terms of the CECILL License # Licensed under the terms of the CECILL License
# Modified for Datadog # Modified for Datadog
@ -19,6 +19,8 @@ import win32service
# GUI Imports # GUI Imports
from guidata.qt.QtCore import SIGNAL, Qt, QSize, QPoint, QTimer from guidata.qt.QtCore import SIGNAL, Qt, QSize, QPoint, QTimer
from guidata.qt.QtGui import QInputDialog, QWidget, QFont, QLabel, QGroupBox, QHBoxLayout, QSystemTrayIcon
from guidata.qt.QtGui import QVBoxLayout, QPushButton, QSplitter, QListWidget, QMenu, QMessageBox
from guidata.configtools import get_icon, get_family, MONOSPACE from guidata.configtools import get_icon, get_family, MONOSPACE
from guidata.qthelpers import get_std_icon from guidata.qthelpers import get_std_icon

View File

@ -6,6 +6,7 @@ import collections
class Plugins(collections.defaultdict): class Plugins(collections.defaultdict):
"""A container for the plugin configurations used by the monasca-agent. """A container for the plugin configurations used by the monasca-agent.
This is essentially a defaultdict(dict) but put into a class primarily to make the interface clear, also This is essentially a defaultdict(dict) but put into a class primarily to make the interface clear, also
to add a couple of helper methods. to add a couple of helper methods.
Each plugin config is stored with the key being its config name (excluding .yaml). Each plugin config is stored with the key being its config name (excluding .yaml).
@ -21,7 +22,8 @@ class Plugins(collections.defaultdict):
raise NotImplementedError raise NotImplementedError
def merge(self, other): def merge(self, other):
"""Do a deep merge with precedence going to other (as is the case with update) """Do a deep merge with precedence going to other (as is the case with update).
""" """
# Implemented as a function so it can be used for arbitrary dictionaries not just self, this is needed # Implemented as a function so it can be used for arbitrary dictionaries not just self, this is needed
# for the recursive nature of the merge. # for the recursive nature of the merge.
@ -29,7 +31,8 @@ class Plugins(collections.defaultdict):
def deep_merge(adict, other): def deep_merge(adict, other):
"""A recursive merge of two dictionaries including combining of any lists within the data structure """A recursive merge of two dictionaries including combining of any lists within the data structure.
""" """
for key, value in other.iteritems(): for key, value in other.iteritems():
if key in adict: if key in adict:

View File

@ -1,3 +1,6 @@
from plugin import Plugin from plugin import Plugin
from utils import find_process_cmdline, find_process_name, watch_process, service_api_check from utils import find_process_cmdline
from utils import find_process_name
from utils import watch_process
from utils import service_api_check
from service_plugin import ServicePlugin from service_plugin import ServicePlugin

View File

@ -1,11 +1,13 @@
"""Classes for detection of running resources to be monitored. """Classes for detection of running resources to be monitored.
Detection classes should be platform independent Detection classes should be platform independent
""" """
class Plugin(object): class Plugin(object):
"""Abstract class implemented by the monasca-agent plugin detection classes """Abstract class implemented by the monasca-agent plugin detection classes.
""" """
# todo these should include dependency detection # todo these should include dependency detection
@ -18,22 +20,28 @@ class Plugin(object):
self._detect() self._detect()
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
"""
raise NotImplementedError raise NotImplementedError
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
raise NotImplementedError raise NotImplementedError
def dependencies_installed(self): def dependencies_installed(self):
"""return True if dependencies are installed """Return True if dependencies are installed.
""" """
raise NotImplementedError raise NotImplementedError
@property @property
def name(self): def name(self):
"""Return _name if set otherwise the class name""" """Return _name if set otherwise the class name.
"""
if '_name' in self.__dict__: if '_name' in self.__dict__:
return self._name return self._name
else: else:

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Ceilometer(ServicePlugin): class Ceilometer(monsetup.detection.ServicePlugin):
"""Detect Ceilometer daemons and setup configuration to monitor them.""" """Detect Ceilometer daemons and setup configuration to monitor them."""

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Cinder(ServicePlugin): class Cinder(monsetup.detection.ServicePlugin):
"""Detect Cinder daemons and setup configuration to monitor them.""" """Detect Cinder daemons and setup configuration to monitor them."""

View File

@ -1,7 +1,7 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Glance(ServicePlugin): class Glance(monsetup.detection.ServicePlugin):
"""Detect Glance daemons and setup configuration to monitor them.""" """Detect Glance daemons and setup configuration to monitor them."""

View File

@ -1,30 +1,34 @@
import collections import collections
import logging import logging
from monsetup.detection import Plugin, find_process_cmdline, watch_process import monsetup.agent_config
from monsetup import agent_config import monsetup.detection
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class Kafka(Plugin): class Kafka(monsetup.detection.Plugin):
"""Detect Kafka daemons and sets up configuration to monitor them. """Detect Kafka daemons and sets up configuration to monitor them.
This plugin configures the kafka_consumer plugin and does not configure any jmx based checks against kafka. This plugin configures the kafka_consumer plugin and does not configure any jmx based checks against kafka.
Note this plugin will pull the same information from kafka on each node in the cluster it runs on. Note this plugin will pull the same information from kafka on each node in the cluster it runs on.
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
if find_process_cmdline('kafka') is not None:
"""
if monsetup.detection.find_process_cmdline('kafka') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
config = agent_config.Plugins() config = monsetup.agent_config.Plugins()
# First watch the process # First watch the process
config.merge(watch_process(['kafka'])) config.merge(monsetup.detection.watch_process(['kafka']))
log.info("\tWatching the kafka process.") log.info("\tWatching the kafka process.")
if self.dependencies_installed(): if self.dependencies_installed():

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Keystone(ServicePlugin): class Keystone(monsetup.detection.ServicePlugin):
"""Detect Keystone daemons and setup configuration to monitor them.""" """Detect Keystone daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True): def __init__(self, template_dir, overwrite=True):
service_params = { service_params = {

View File

@ -1,28 +1,33 @@
"""Classes for monitoring the monitoring server stack. """Classes for monitoring the monitoring server stack.
Covering mon-persister, mon-api and mon-thresh. Covering mon-persister, mon-api and mon-thresh.
Kafka, mysql, vertica and influxdb are covered by other detection plugins. Mon-notification uses statsd. Kafka, mysql, vertica and influxdb are covered by other detection plugins. Mon-notification uses statsd.
""" """
import logging import logging
from monsetup.detection import Plugin, find_process_cmdline, watch_process import monsetup.agent_config
from monsetup import agent_config import monsetup.detection
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class MonPersister(Plugin): class MonPersister(monsetup.detection.Plugin):
"""Detect mon_persister and setup monitoring. """Detect mon_persister and setup monitoring.
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
if find_process_cmdline('mon-persister') is not None:
"""
if monsetup.detection.find_process_cmdline('mon-persister') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
log.info("\tEnabling the mon persister healthcheck") log.info("\tEnabling the mon persister healthcheck")
return dropwizard_health_check('mon-persister', 'http://localhost:8091/healthcheck') return dropwizard_health_check('mon-persister', 'http://localhost:8091/healthcheck')
@ -35,14 +40,15 @@ class MonPersister(Plugin):
return True return True
class MonAPI(Plugin): class MonAPI(monsetup.detection.Plugin):
"""Detect mon_api and setup monitoring. """Detect mon_api and setup monitoring.
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected."""
if find_process_cmdline('mon-api') is not None: if monsetup.detection.find_process_cmdline('mon-api') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
@ -59,28 +65,35 @@ class MonAPI(Plugin):
return True return True
class MonThresh(Plugin): class MonThresh(monsetup.detection.Plugin):
"""Detect the running mon-thresh and monitor""" """Detect the running mon-thresh and monitor.
"""
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
if find_process_cmdline('mon-thresh') is not None:
"""
if monsetup.detection.find_process_cmdline('mon-thresh') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
log.info("\tWatching the mon-thresh process.") log.info("\tWatching the mon-thresh process.")
return watch_process(['mon-thresh']) return monsetup.detection.watch_process(['mon-thresh'])
def dependencies_installed(self): def dependencies_installed(self):
return True return True
def dropwizard_health_check(name, url): def dropwizard_health_check(name, url):
"""Setup a dropwizard heathcheck to be watched by the http_check plugin.""" """Setup a dropwizard heathcheck to be watched by the http_check plugin.
config = agent_config.Plugins()
"""
config = monsetup.agent_config.Plugins()
config['http_check'] = {'init_config': None, config['http_check'] = {'init_config': None,
'instances': [{'name': name, 'instances': [{'name': name,
'url': url, 'url': url,

View File

@ -1,16 +1,17 @@
import logging import logging
from monsetup.detection import Plugin, find_process_name, watch_process import monsetup.agent_config
from monsetup import agent_config import monsetup.detection
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
mysql_conf = '/root/.my.cnf' mysql_conf = '/root/.my.cnf'
class MySQL(Plugin): class MySQL(monsetup.detection.Plugin):
"""Detect MySQL daemons and setup configuration to monitor them. """Detect MySQL daemons and setup configuration to monitor them.
This plugin needs user/pass infor for mysql setup, this is This plugin needs user/pass infor for mysql setup, this is
best placed in /root/.my.cnf in a format such as best placed in /root/.my.cnf in a format such as
[client] [client]
@ -19,16 +20,19 @@ class MySQL(Plugin):
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected""" """Run detection, set self.available True if the service is detected.
if find_process_name('mysqld') is not None:
"""
if monsetup.detection.find_process_name('mysqld') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
config = agent_config.Plugins() config = monsetup.agent_config.Plugins()
# First watch the process # First watch the process
config.merge(watch_process(['mysqld'])) config.merge(monsetup.detection.watch_process(['mysqld']))
log.info("\tWatching the mysqld process.") log.info("\tWatching the mysqld process.")
# Attempt login, requires either an empty root password from localhost # Attempt login, requires either an empty root password from localhost

View File

@ -1,4 +1,5 @@
import os import os
import yaml import yaml
from monsetup.detection import Plugin from monsetup.detection import Plugin
@ -8,14 +9,18 @@ from monsetup import agent_config
class Network(Plugin): class Network(Plugin):
"""No configuration here, working networking is assumed so this is either on or off. """No configuration here, working networking is assumed so this is either on or off.
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
"""
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
# A bit silly to parse the yaml only for it to be converted back but this # A bit silly to parse the yaml only for it to be converted back but this
# plugin is the exception not the rule # plugin is the exception not the rule

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Neutron(ServicePlugin): class Neutron(monsetup.detection.ServicePlugin):
"""Detect Neutron daemons and setup configuration to monitor them.""" """Detect Neutron daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True): def __init__(self, template_dir, overwrite=True):
service_params = { service_params = {

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Nova(ServicePlugin): class Nova(monsetup.detection.ServicePlugin):
"""Detect Nova daemons and setup configuration to monitor them.""" """Detect Nova daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True): def __init__(self, template_dir, overwrite=True):
service_params = { service_params = {

View File

@ -1,30 +1,35 @@
import os import os
import yaml import yaml
from monsetup.detection import Plugin, find_process_name import monsetup.agent_config
from monsetup import agent_config import monsetup.detection
class Postfix(Plugin): class Postfix(monsetup.detection.Plugin):
"""If postfix is running install the default config.
"""If postfix is running install the default config
""" """
# todo this is is disabled as postfix requires passwordless sudo for the # todo this is is disabled as postfix requires passwordless sudo for the
# monasca-agent user, a bad practice # monasca-agent user, a bad practice
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
if find_process_name('postfix') is not None:
"""
if monsetup.detection.find_process_name('postfix') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
# A bit silly to parse the yaml only for it to be converted back but this # A bit silly to parse the yaml only for it to be converted back but this
# plugin is the exception not the rule # plugin is the exception not the rule
with open(os.path.join(self.template_dir, 'conf.d/postfix.yaml.example'), 'r') as postfix_template: with open(os.path.join(self.template_dir, 'conf.d/postfix.yaml.example'), 'r') as postfix_template:
default_net_config = yaml.load(postfix_template.read()) default_net_config = yaml.load(postfix_template.read())
config = agent_config.Plugins() config = monsetup.agent_config.Plugins()
config['postfix'] = default_net_config config['postfix'] = default_net_config
return config return config

View File

@ -1,9 +1,11 @@
from monsetup.detection import ServicePlugin import monsetup.detection
class Swift(ServicePlugin): class Swift(monsetup.detection.ServicePlugin):
"""Detect Swift daemons and setup configuration to monitor them.""" """Detect Swift daemons and setup configuration to monitor them.
"""
def __init__(self, template_dir, overwrite=True): def __init__(self, template_dir, overwrite=True):
service_params = { service_params = {

View File

@ -1,30 +1,35 @@
import logging import logging
import os import os
import yaml import yaml
from monsetup.detection import Plugin, find_process_cmdline, watch_process import monsetup.agent_config
from monsetup import agent_config import monsetup.detection
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class Zookeeper(Plugin): class Zookeeper(monsetup.detection.Plugin):
"""Detect Zookeeper daemons and setup configuration to monitor them. """Detect Zookeeper daemons and setup configuration to monitor them.
""" """
def _detect(self): def _detect(self):
"""Run detection, set self.available True if the service is detected.""" """Run detection, set self.available True if the service is detected.
if find_process_cmdline('zookeeper') is not None:
"""
if monsetup.detection.find_process_cmdline('zookeeper') is not None:
self.available = True self.available = True
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
config = agent_config.Plugins() config = monsetup.agent_config.Plugins()
# First watch the process # First watch the process
log.info("\tWatching the zookeeper process.") log.info("\tWatching the zookeeper process.")
config.merge(watch_process(['zookeeper'])) config.merge(monsetup.detection.watch_process(['zookeeper']))
log.info("\tEnabling the zookeeper plugin") log.info("\tEnabling the zookeeper plugin")
with open(os.path.join(self.template_dir, 'conf.d/zk.yaml.example'), 'r') as zk_template: with open(os.path.join(self.template_dir, 'conf.d/zk.yaml.example'), 'r') as zk_template:

View File

@ -1,8 +1,11 @@
import logging import logging
from plugin import Plugin from plugin import Plugin
from monsetup import agent_config from monsetup import agent_config
from monsetup.detection import find_process_cmdline, watch_process, service_api_check from monsetup.detection import find_process_cmdline
from monsetup.detection import service_api_check
from monsetup.detection import watch_process
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -11,6 +14,7 @@ log = logging.getLogger(__name__)
class ServicePlugin(Plugin): class ServicePlugin(Plugin):
"""Base class implemented by the monasca-agent plugin detection classes """Base class implemented by the monasca-agent plugin detection classes
for OpenStack Services for OpenStack Services
""" """
@ -23,7 +27,9 @@ class ServicePlugin(Plugin):
super(ServicePlugin, self).__init__(kwargs['template_dir'], kwargs['overwrite']) super(ServicePlugin, self).__init__(kwargs['template_dir'], kwargs['overwrite'])
def _detect(self): def _detect(self):
"""Run detection""" """Run detection.
"""
self.found_processes = [] self.found_processes = []
for process in self.process_names: for process in self.process_names:
@ -34,6 +40,7 @@ class ServicePlugin(Plugin):
def build_config(self): def build_config(self):
"""Build the config as a Plugins object and return. """Build the config as a Plugins object and return.
""" """
config = agent_config.Plugins() config = agent_config.Plugins()
for process in self.found_processes: for process in self.found_processes:
@ -50,6 +57,7 @@ class ServicePlugin(Plugin):
return config return config
def dependencies_installed(self): def dependencies_installed(self):
"""return True if dependencies are installed """Return True if dependencies are installed.
""" """
return True return True

View File

@ -6,7 +6,8 @@ from monsetup import agent_config
def find_process_cmdline(search_string): def find_process_cmdline(search_string):
"""Simple function to search running process for one with cmdline containing """Simple function to search running process for one with cmdline containing.
""" """
for process in psutil.process_iter(): for process in psutil.process_iter():
for arg in process.cmdline(): for arg in process.cmdline():
@ -18,6 +19,7 @@ def find_process_cmdline(search_string):
def find_process_name(pname): def find_process_name(pname):
"""Simple function to search running process for one with pname. """Simple function to search running process for one with pname.
""" """
for process in psutil.process_iter(): for process in psutil.process_iter():
if pname == process.name(): if pname == process.name():
@ -28,6 +30,7 @@ def find_process_name(pname):
def watch_process(search_strings, service=None, component=None): def watch_process(search_strings, service=None, component=None):
"""Takes a list of process search strings and returns a Plugins object with the config set. """Takes a list of process search strings and returns a Plugins object with the config set.
This was built as a helper as many plugins setup process watching This was built as a helper as many plugins setup process watching
""" """
config = agent_config.Plugins() config = agent_config.Plugins()
@ -44,7 +47,9 @@ def watch_process(search_strings, service=None, component=None):
def service_api_check(name, url, pattern, service=None, component=None): def service_api_check(name, url, pattern, service=None, component=None):
"""Setup a service api to be watched by the http_check plugin.""" """Setup a service api to be watched by the http_check plugin.
"""
config = agent_config.Plugins() config = agent_config.Plugins()
parameters = {'name': name, parameters = {'name': name,
'url': url, 'url': url,

View File

@ -9,13 +9,22 @@ import pwd
import socket import socket
import subprocess import subprocess
import sys import sys
import yaml
import platform import platform
import yaml
import agent_config import agent_config
from detection.plugins import kafka, mon, mysql, network, zookeeper from detection.plugins import kafka
from detection.plugins import nova, glance, cinder, neutron, swift from detection.plugins import mon
from detection.plugins import keystone, ceilometer from detection.plugins import mysql
from detection.plugins import network
from detection.plugins import zookeeper
from detection.plugins import nova
from detection.plugins import glance
from detection.plugins import cinder
from detection.plugins import neutron
from detection.plugins import swift
from detection.plugins import keystone
from detection.plugins import ceilometer
from service import sysv from service import sysv
# List of all detection plugins to run # List of all detection plugins to run

View File

@ -1,11 +1,14 @@
"""Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately """Classes implementing different methods for running monasca-agent on startup as well as starting the process immediately.
""" """
import psutil import psutil
class Service(object): class Service(object):
"""Abstract base class implementing the interface for various service types.""" """Abstract base class implementing the interface for various service types.
"""
def __init__(self, config_dir, log_dir, name='monasca-agent'): def __init__(self, config_dir, log_dir, name='monasca-agent'):
self.config_dir = config_dir self.config_dir = config_dir
@ -14,29 +17,34 @@ class Service(object):
def enable(self): def enable(self):
"""Sets monasca-agent to start on boot. """Sets monasca-agent to start on boot.
Generally this requires running as super user Generally this requires running as super user
""" """
raise NotImplementedError raise NotImplementedError
def start(self, restart=True): def start(self, restart=True):
"""Starts monasca-agent """Starts monasca-agent.
If the agent is running and restart is True, restart If the agent is running and restart is True, restart
""" """
raise NotImplementedError raise NotImplementedError
def stop(self): def stop(self):
"""Stops monasca-agent """Stops monasca-agent.
""" """
raise NotImplementedError raise NotImplementedError
def is_enabled(self): def is_enabled(self):
"""Returns True if monasca-agent is setup to start on boot, false otherwise """Returns True if monasca-agent is setup to start on boot, false otherwise.
""" """
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def is_running(): def is_running():
"""Returns True if monasca-agent is running, false otherwise """Returns True if monasca-agent is running, false otherwise.
""" """
# Looking for the supervisor process not the individual components # Looking for the supervisor process not the individual components
for process in psutil.process_iter(): for process in psutil.process_iter():

View File

@ -1,20 +1,23 @@
"""System V style service. """System V style service.
""" """
from glob import glob import glob
import logging import logging
import os import os
import pwd import pwd
import subprocess import subprocess
from . import Service import service
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class SysV(Service): class SysV(service.Service):
def __init__(self, init_template, config_dir, log_dir, name='monasca-agent', username='monasca-agent'): def __init__(self, init_template, config_dir, log_dir, name='monasca-agent', username='monasca-agent'):
"""Setup this service with the given init template""" """Setup this service with the given init template.
"""
super(SysV, self).__init__(config_dir, log_dir, name) super(SysV, self).__init__(config_dir, log_dir, name)
self.init_script = '/etc/init.d/%s' % self.name self.init_script = '/etc/init.d/%s' % self.name
self.init_template = init_template self.init_template = init_template
@ -22,6 +25,7 @@ class SysV(Service):
def enable(self): def enable(self):
"""Sets monasca-agent to start on boot. """Sets monasca-agent to start on boot.
Generally this requires running as super user Generally this requires running as super user
""" """
# Create monasca-agent user/group if needed # Create monasca-agent user/group if needed
@ -53,7 +57,8 @@ class SysV(Service):
log.info('Enabled {0} service via SysV init script'.format(self.name)) log.info('Enabled {0} service via SysV init script'.format(self.name))
def start(self, restart=True): def start(self, restart=True):
"""Starts monasca-agent """Starts monasca-agent.
If the agent is running and restart is True, restart If the agent is running and restart is True, restart
""" """
if not self.is_enabled(): if not self.is_enabled():
@ -65,7 +70,8 @@ class SysV(Service):
return True return True
def stop(self): def stop(self):
"""Stops monasca-agent """Stops monasca-agent.
""" """
if not self.is_enabled(): if not self.is_enabled():
log.error('The service is not enabled') log.error('The service is not enabled')
@ -76,12 +82,13 @@ class SysV(Service):
return True return True
def is_enabled(self): def is_enabled(self):
"""Returns True if monasca-agent is setup to start on boot, false otherwise """Returns True if monasca-agent is setup to start on boot, false otherwise.
""" """
if not os.path.exists(self.init_script): if not os.path.exists(self.init_script):
return False return False
if len(glob('/etc/rc?.d/S??monasca-agent')) > 0: if len(glob.glob('/etc/rc?.d/S??monasca-agent')) > 0:
return True return True
else: else:
return False return False

View File

@ -1,6 +1,22 @@
from setuptools import setup #!/usr/bin/env python
setup( # Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import setuptools
setuptools.setup(
setup_requires=['pbr'], setup_requires=['pbr'],
pbr=True pbr=True)
)

View File

@ -75,9 +75,4 @@ def get_check(name, config_str):
raise Exception( raise Exception(
"Unable to import check %s. Missing a class that inherits AgentCheck" % name) "Unable to import check %s. Missing a class that inherits AgentCheck" % name)
agent_config = {
'version': '0.1',
'api_key': 'tota'
}
return check_class.from_yaml(yaml_text=config_str, check_name=name) return check_class.from_yaml(yaml_text=config_str, check_name=name)

View File

@ -73,7 +73,6 @@ class TestCacti(unittest.TestCase):
# Check once more to make sure last_ts ignores None vals when calculating # Check once more to make sure last_ts ignores None vals when calculating
# where to start from # where to start from
check.check(instances[0]) check.check(instances[0])
results3 = check.get_metrics()
last_ts2 = check.last_ts[rrd_dir + '/localhost_hdd_free_10.rrd.AVERAGE'] last_ts2 = check.last_ts[rrd_dir + '/localhost_hdd_free_10.rrd.AVERAGE']
self.assertEqual(last_ts1, last_ts2) self.assertEqual(last_ts1, last_ts2)

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
aggregator = MetricsAggregator("test_host") aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT) self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator) self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start) self.t1 = threading.Thread(target=self.server.start)

View File

@ -212,6 +212,7 @@ class TestDogstream(TailTestCase):
'dogstreams': '%s:tests.test_datadog:parse_ancient_function_plugin' % 'dogstreams': '%s:tests.test_datadog:parse_ancient_function_plugin' %
self.log_file.name}) self.log_file.name})
actual_output = plugdog.check(self.config, move_end=False) actual_output = plugdog.check(self.config, move_end=False)
self.assertEqual(expected_output, actual_output)
def test_dogstream_function_plugin(self): def test_dogstream_function_plugin(self):
"""Ensure that non-class-based stateful plugins work""" """Ensure that non-class-based stateful plugins work"""

View File

@ -26,6 +26,7 @@ class TestElastic(unittest.TestCase):
loop += 1 loop += 1
if loop >= MAX_WAIT: if loop >= MAX_WAIT:
break break
return request
def setUp(self): def setUp(self):
self.process = None self.process = None

View File

@ -38,6 +38,7 @@ class HaproxyTestCase(unittest.TestCase):
loop += 1 loop += 1
if loop >= MAX_WAIT: if loop >= MAX_WAIT:
break break
return request
def setUp(self): def setUp(self):
self.process = None self.process = None

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
aggregator = MetricsAggregator("test_host") aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT) self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator) self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start) self.t1 = threading.Thread(target=self.server.start)

View File

@ -52,7 +52,7 @@ class TestMemCache(unittest.TestCase):
# Check that we got 21 metrics for a specific host # Check that we got 21 metrics for a specific host
self.assertEqual( self.assertEqual(
len([t for t in r if t[3].get('dimensions') == {"instance": mythirdtag}]), 21, r) len([t for t in r if t[3].get('dimensions') == {"instance": "mythirdtag"}]), 21, r)
def testDimensions(self): def testDimensions(self):
raise SkipTest('Requires mcache') raise SkipTest('Requires mcache')

View File

@ -79,7 +79,6 @@ class TestMongo(unittest.TestCase):
c1.admin.command("replSetInitiate") c1.admin.command("replSetInitiate")
# Sleep for 15s until replication is stable # Sleep for 15s until replication is stable
time.sleep(30) time.sleep(30)
x = c1.admin.command("replSetGetStatus")
assert pymongo.Connection('localhost:%s' % PORT2) assert pymongo.Connection('localhost:%s' % PORT2)
except Exception: except Exception:
logging.getLogger().exception("Cannot instantiate mongod properly") logging.getLogger().exception("Cannot instantiate mongod properly")

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
aggregator = MetricsAggregator("test_host") aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT) self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator) self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start) self.t1 = threading.Thread(target=self.server.start)

View File

@ -1,11 +1,17 @@
import unittest
from functools import reduce from functools import reduce
import logging
import platform
import re
import unittest
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__file__) logger = logging.getLogger(__file__)
from monagent.collector.checks.system.unix import *
from common import get_check from common import get_check
from monagent.collector.checks.system.unix import Cpu
from monagent.collector.checks.system.unix import Disk
from monagent.collector.checks.system.unix import IO
from monagent.collector.checks.system.unix import Memory
class TestSystem(unittest.TestCase): class TestSystem(unittest.TestCase):
@ -115,7 +121,7 @@ none 985964 1 985963 1% /lib/init/rw
def test_collecting_disk_metrics(self): def test_collecting_disk_metrics(self):
"""Testing disk stats gathering""" """Testing disk stats gathering"""
if Platform.is_unix(): if platform.system() == 'Linux':
disk = Disk(logger, {}) disk = Disk(logger, {})
res = disk.check() res = disk.check()
# Assert we have disk & inode stats # Assert we have disk & inode stats
@ -126,7 +132,7 @@ none 985964 1 985963 1% /lib/init/rw
def testMemory(self): def testMemory(self):
global logger global logger
res = Memory(logger).check() res = Memory(logger).check()
if Platform.is_linux(): if platform.system() == 'Linux':
for k in ( for k in (
"swapTotal", "swapFree", "swapPctFree", "swapUsed", "physTotal", "physFree", "swapTotal", "swapFree", "swapPctFree", "swapUsed", "physTotal", "physFree",
"physUsed", "physBuffers", "physCached", "physUsable", "physPctUsable", "physUsed", "physBuffers", "physCached", "physUsable", "physPctUsable",
@ -134,7 +140,7 @@ none 985964 1 985963 1% /lib/init/rw
assert k in res, res assert k in res, res
assert res["swapTotal"] == res["swapFree"] + res["swapUsed"] assert res["swapTotal"] == res["swapFree"] + res["swapUsed"]
assert res["physTotal"] == res["physFree"] + res["physUsed"] assert res["physTotal"] == res["physFree"] + res["physUsed"]
elif sys.platform == 'darwin': elif platform.system() == 'Darwin':
for k in ("swapFree", "swapUsed", "physFree", "physUsed"): for k in ("swapFree", "swapUsed", "physFree", "physUsed"):
assert k in res, res assert k in res, res

View File

@ -42,7 +42,6 @@ class JMXTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
aggregator = MetricsAggregator("test_host") aggregator = MetricsAggregator("test_host")
self.server = Server(aggregator, "localhost", STATSD_PORT) self.server = Server(aggregator, "localhost", STATSD_PORT)
pid_file = PidFile('dogstatsd')
self.reporter = DummyReporter(aggregator) self.reporter = DummyReporter(aggregator)
self.t1 = threading.Thread(target=self.server.start) self.t1 = threading.Thread(target=self.server.start)

View File

@ -102,6 +102,7 @@ class PseudoAgent(object):
x = 0 x = 0
while True: while True:
x = random() x = random()
return x
@staticmethod @staticmethod
def hanging_net(): def hanging_net():

View File

@ -1,5 +1,6 @@
import unittest import unittest
import logging import logging
import win32evtlog
from nose.plugins.attrib import attr from nose.plugins.attrib import attr
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
@ -43,7 +44,6 @@ class WinEventLogTest(unittest.TestCase):
def setUp(self): def setUp(self):
raise SkipTest("Requires win32evtlog module") raise SkipTest("Requires win32evtlog module")
import win32evtlog
self.LOG_EVENTS = [ self.LOG_EVENTS = [
('Test 1', win32evtlog.EVENTLOG_WARNING_TYPE), ('Test 1', win32evtlog.EVENTLOG_WARNING_TYPE),
('Test 2', win32evtlog.EVENTLOG_ERROR_TYPE), ('Test 2', win32evtlog.EVENTLOG_ERROR_TYPE),

20
tox.ini
View File

@ -20,22 +20,12 @@ commands = flake8
commands = {posargs} commands = {posargs}
[flake8] [flake8]
max-line-length = 120
# TODO: ignored checks should be enabled in the future # TODO: ignored checks should be enabled in the future
# H201 no 'except:' at least use 'except Exception:' # E501 Line length > 80 characters
# H202 assertRaises Exception too broad
# H237 module is removed in Python
# H301 one import per line
# H305 imports not grouped correctly
# H306 imports not in alphabetical order
# H307 like imports should be grouped together
# H401 docstring should not start with a space
# H402 one line docstring needs punctuation.
# H403 multi line docstrings should end on a new line
# H404 multi line docstring should start without a leading new line
# H405 multi line docstring summary not separated with an empty line
# F401 module imported but unused # F401 module imported but unused
# F821 undefined name # H302 import only modules
# F841 local variable is assigned to but never used # H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)
ignore = E501,H201,H202,H237,H301,H305,H306,H307,H401,H402,H403,H404,H405,H904,F401,F403,F821,F841 ignore = E501, F401, H302, H904,
show-source = True show-source = True
exclude=.venv,.git,.tox,dist,*egg,build exclude=.venv,.git,.tox,dist,*egg,build