Collect OpenStack metrics from one controller only

This change modifies the collectd plugins to collect OpenStack metrics
only when the controller node own the management VIP address. This
avoids duplication of work on all controllers and reduce the load on the
OpenStack services.

Change-Id: I068935fc9dede38f8cfbd6c7499e9b4ea956822e
This commit is contained in:
Simon Pasquier 2015-08-11 15:00:09 +02:00
parent 507c3c229d
commit e288807320
10 changed files with 150 additions and 74 deletions

View File

@ -86,16 +86,20 @@ if $lma_collector['influxdb_mode'] != 'disabled' {
}
class { 'lma_collector::collectd::controller':
service_user => 'nova',
service_password => $nova['user_password'],
service_tenant => 'services',
keystone_url => "http://${management_vip}:5000/v2.0",
haproxy_socket => $haproxy_socket,
ceph_enabled => $ceph_enabled,
memcached_host => hiera('internal_address'),
pacemaker_resources => [
'vip__public', 'vip__management', 'vip__public_vrouter',
'vip__management_vrouter'],
service_user => 'nova',
service_password => $nova['user_password'],
service_tenant => 'services',
keystone_url => "http://${management_vip}:5000/v2.0",
haproxy_socket => $haproxy_socket,
ceph_enabled => $ceph_enabled,
memcached_host => hiera('internal_address'),
pacemaker_resources => [
'vip__public',
'vip__management',
'vip__public_vrouter',
'vip__management_vrouter',
],
pacemaker_master_resource => 'vip__management',
}
class { 'lma_collector::collectd::mysql':

View File

@ -20,7 +20,7 @@ import openstack
from urlparse import urlparse
PLUGIN_NAME = 'check_openstack_api'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class APICheckPlugin(openstack.CollectdPlugin):
@ -89,6 +89,7 @@ class APICheckPlugin(openstack.CollectdPlugin):
'region': service['region']
}
@openstack.read_callback_wrapper
def read_callback(self):
for item in self.check_api():
if item['status'] == self.UNKNOWN:
@ -115,8 +116,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'hypervisor_stats'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class HypervisorStatsPlugin(openstack.CollectdPlugin):
@ -54,6 +54,7 @@ class HypervisorStatsPlugin(openstack.CollectdPlugin):
)
v.dispatch()
@openstack.read_callback_wrapper
def read_callback(self):
r = self.get('nova', 'os-hypervisors/statistics')
if not r:
@ -66,7 +67,7 @@ class HypervisorStatsPlugin(openstack.CollectdPlugin):
if 'cpu_ratio' in self.extra_config:
vcpus = int(self.extra_config['cpu_ratio'] * stats.get('vcpus', 0))
self.dispatch_value('total_free_vcpus',
vcpus - stats.get('vcpus_used', 0))
vcpus - stats.get('vcpus_used', 0))
plugin = HypervisorStatsPlugin(collectd)
@ -76,8 +77,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -15,9 +15,15 @@
import datetime
import dateutil.parser
import dateutil.tz
from functools import wraps
import requests
import simplejson as json
# By default, query OpenStack API endpoints every 50 seconds. We choose a value
# less than the default group by interval (which is 60 seconds) to avoid gaps
# in the Grafana graphs.
INTERVAL = 50
class OSClient(object):
""" Base class for querying the OpenStack API endpoints.
@ -138,6 +144,17 @@ class OSClient(object):
return r
# A decorator that will call the decorated function only when the plugin has
# detected that it is currently active.
def read_callback_wrapper(f):
@wraps(f)
def wrapper(self, *args, **kwargs):
if self.do_collect_data:
f(self, *args, **kwargs)
return wrapper
class CollectdPlugin(object):
def __init__(self, logger):
@ -146,6 +163,9 @@ class CollectdPlugin(object):
self.timeout = 5
self.max_retries = 3
self.extra_config = {}
# attributes controlling whether the plugin is in collect mode or not
self.do_collect_data = True
self.depends_on_resource = None
def _build_url(self, service, resource):
s = (self.get_service(service) or {})
@ -201,12 +221,43 @@ class CollectdPlugin(object):
tenant_name = node.values[0]
elif node.key == 'KeystoneUrl':
keystone_url = node.values[0]
elif node.key == 'DependsOnResource':
self.depends_on_resource = node.values[0]
self.os_client = OSClient(username, password, tenant_name,
keystone_url, self.timeout, self.logger,
self.max_retries)
def notification_callback(self, notification):
if not self.depends_on_resource:
return
try:
data = json.loads(notification.message)
except ValueError:
return
if 'value' not in data:
self.logger.warning(
"%s: missing 'value' in notification" %
self.__class__.__name__)
elif 'resource' not in data:
self.logger.warning(
"%s: missing 'resource' in notification" %
self.__class__.__name__)
elif data['resource'] == self.depends_on_resource:
do_collect_data = data['value'] > 0
if self.do_collect_data != do_collect_data:
# log only the transitions
self.logger.notice("%s: do_collect_data=%s" %
(self.__class__.__name__, do_collect_data))
self.do_collect_data = do_collect_data
def read_callback(self):
raise "read_callback method needs to be overriden!"
""" Read metrics and dispatch values
This method should be overriden by the derived classes.
"""
raise "read_callback() method needs to be overriden!"
def get_objects(self, project, object_name, api_version='',
params='all_tenants=1'):

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'cinder'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class CinderStatsPlugin(openstack.CollectdPlugin):
@ -31,6 +31,7 @@ class CinderStatsPlugin(openstack.CollectdPlugin):
def config_callback(self, config):
super(CinderStatsPlugin, self).config_callback(config)
@openstack.read_callback_wrapper
def read_callback(self):
volumes_details = self.get_objects_details('cinder', 'volumes')
@ -83,9 +84,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'glance'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class GlanceStatsPlugin(openstack.CollectdPlugin):
@ -31,6 +31,7 @@ class GlanceStatsPlugin(openstack.CollectdPlugin):
def config_callback(self, config):
super(GlanceStatsPlugin, self).config_callback(config)
@openstack.read_callback_wrapper
def read_callback(self):
def is_snap(d):
@ -87,10 +88,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'keystone'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class KeystoneStatsPlugin(openstack.CollectdPlugin):
@ -31,6 +31,7 @@ class KeystoneStatsPlugin(openstack.CollectdPlugin):
def config_callback(self, config):
super(KeystoneStatsPlugin, self).config_callback(config)
@openstack.read_callback_wrapper
def read_callback(self):
def groupby(d):
@ -85,8 +86,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'neutron'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class NeutronStatsPlugin(openstack.CollectdPlugin):
@ -34,6 +34,7 @@ class NeutronStatsPlugin(openstack.CollectdPlugin):
def config_callback(self, config):
super(NeutronStatsPlugin, self).config_callback(config)
@openstack.read_callback_wrapper
def read_callback(self):
def groupby_network(x):
return "networks.%s" % x.get('status', 'unknown').lower()
@ -114,8 +115,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -18,7 +18,7 @@ import collectd
import openstack
PLUGIN_NAME = 'nova'
INTERVAL = 60
INTERVAL = openstack.INTERVAL
class NovaStatsPlugin(openstack.CollectdPlugin):
@ -30,6 +30,7 @@ class NovaStatsPlugin(openstack.CollectdPlugin):
def config_callback(self, config):
super(NovaStatsPlugin, self).config_callback(config)
@openstack.read_callback_wrapper
def read_callback(self):
servers_details = self.get_objects_details('nova', 'servers')
@ -60,8 +61,13 @@ def config_callback(conf):
plugin.config_callback(conf)
def notification_callback(notification):
plugin.notification_callback(notification)
def read_callback():
plugin.read_callback()
collectd.register_config(config_callback)
collectd.register_notification(notification_callback)
collectd.register_read(read_callback, INTERVAL)

View File

@ -24,73 +24,54 @@ class lma_collector::collectd::controller (
$memcached_host = $lma_collector::params::memcached_host,
$apache_host = $lma_collector::params::apache_status_host,
$pacemaker_resources = undef,
$pacemaker_master_resource = undef,
) inherits lma_collector::params {
include collectd::params
include lma_collector::collectd::service
# We can't use the collectd::plugin::python type here because it doesn't
$openstack_configuration = {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
}
if $pacemaker_master_resource {
$openstack_configuration['DependsOnResource'] = $pacemaker_master_resource
}
# We can't use the collectd::plugin::python resource here because it doesn't
# support the configuration of multiple Python plugins yet.
# See https://github.com/pdxcat/puppet-module-collectd/issues/227
$modules = {
'rabbitmq_info' => {
},
'check_openstack_api' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'hypervisor_stats' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
'CpuAllocationRatio' => $nova_cpu_allocation_ratio,
},
'openstack_nova' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'openstack_cinder' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'openstack_glance' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'openstack_keystone' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'openstack_neutron' => {
'Username' => $service_user,
'Password' => $service_password,
'Tenant' => $service_tenant,
'KeystoneUrl' => $keystone_url,
'Timeout' => $lma_collector::params::openstack_client_timeout,
},
'check_openstack_api' => $openstack_configuration,
'hypervisor_stats' => merge(
$openstack_configuration,
{'CpuAllocationRatio' => $nova_cpu_allocation_ratio,}
),
'openstack_nova' => $openstack_configuration,
'openstack_cinder' => $openstack_configuration,
'openstack_glance' => $openstack_configuration,
'openstack_keystone' => $openstack_configuration,
'openstack_neutron' => $openstack_configuration,
}
if $pacemaker_resources {
validate_array($pacemaker_resources)
$modules['pacemaker_resource'] = {
'Resource' => $pacemaker_resources,
}
if $pacemaker_master_resource {
if ! member($pacemaker_resources, $pacemaker_master_resource) {
fail("${pacemaker_master_resource} isn't a member of ${pacemaker_resources}")
}
}
# Configure the filter that will notify other collectd plugins about the
# state of the Pacemaker resources
collectd::plugin { 'target_notification':
@ -107,7 +88,8 @@ class lma_collector::collectd::controller (
'match' => {
'type' => 'regex',
'matches' => {
'Plugin' => '^pacemaker_resource$',
'Plugin' => '^pacemaker_resource$',
'TypeInstance' => "^${pacemaker_master_resource}$",
},
},
'targets' => [