Merge "Make alarm severity configurable"

This commit is contained in:
Jenkins 2017-07-17 01:55:18 +00:00 committed by Gerrit Code Review
commit 19fe922da4
12 changed files with 332 additions and 33 deletions

View File

@ -40,17 +40,22 @@ class Notifier(object):
# prepare all data related to the sample
metername = notification.meter_name(vl)
message = notification.message(vl)
severity = notification.severity(vl)
severity = notification.collectd_severity(vl)
resource_id = notification.resource_id(vl)
timestamp = datetime.datetime.utcfromtimestamp(vl.time).isoformat()
if notification.alarm_severity(metername) in \
('critical', 'moderate', 'low'):
alarm_severity = notification.alarm_severity(metername)
else:
alarm_severity = 'moderate'
LOGGER.debug(
'Writing: plugin="%s", message="%s", severity="%s", time="%s',
vl.plugin, message, severity, timestamp)
self._send_data(metername, severity, resource_id)
self._send_data(metername, severity, resource_id, alarm_severity)
def _send_data(self, metername, severity, resource_id):
def _send_data(self, metername, severity, resource_id, alarm_severity):
"""Send data to Aodh."""
LOGGER.debug('Sending alarm for %s', metername)
self._sender.send(metername, severity, resource_id)
self._sender.send(metername, severity, resource_id, alarm_severity)

View File

@ -110,8 +110,17 @@ class Sender(object):
return self._auth_token
def send(self, metername, severity, resource_id):
"""Send the payload to Aodh."""
def send(self, metername, severity, resource_id, alarm_severity):
"""Send the payload to Aodh.
severity: is retrieved from the collectd notification itself, it
defines how severely a threshold is broken. Changes everytime
a notification is generated for a specific meter.
alarm_severity: is a variable used to define the severity of the aodh
alarm that will be created. Defined when the alarm is
created and doesn't change, it defines how severe the
situation is if that alarm is triggered.
"""
# get the auth_token
auth_token = self._authenticate()
LOGGER.info('Auth_token: %s',
@ -131,8 +140,8 @@ class Sender(object):
alarm_name = self._get_alarm_name(metername, resource_id)
# Update or create this alarm
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
result = self._update_or_create_alarm(alarm_name, auth_token, severity,
metername, alarm_severity)
if result is None:
return
@ -155,7 +164,8 @@ class Sender(object):
if auth_token is not None:
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
severity, metername,
alarm_severity)
if result.status_code == HTTP_NOT_FOUND:
LOGGER.debug("Received 404 error when submitting %s notification, \
@ -164,7 +174,8 @@ class Sender(object):
# check for and/or get alarm_id
result = self._update_or_create_alarm(alarm_name, auth_token,
severity, metername)
severity, metername,
alarm_severity)
if result.status_code == HTTP_CREATED:
LOGGER.debug('Result: %s', HTTP_CREATED)
@ -181,7 +192,7 @@ class Sender(object):
return endpoint
def _update_or_create_alarm(self, alarm_name, auth_token,
severity, metername):
severity, metername, alarm_severity):
# check for an alarm and update
try:
alarm_id = self._get_alarm_id(alarm_name)
@ -194,17 +205,19 @@ class Sender(object):
endpoint = self._get_endpoint("aodh")
LOGGER.warn('No known ID for %s', alarm_name)
result, self._alarm_ids[alarm_name] = \
self._create_alarm(endpoint, severity, metername, alarm_name)
self._create_alarm(endpoint, severity,
metername, alarm_name, alarm_severity)
return result
def _create_alarm(self, endpoint, severity, metername, alarm_name):
def _create_alarm(self, endpoint, severity, metername,
alarm_name, alarm_severity):
"""Create a new alarm with a new alarm_id."""
url = "{}/v2/alarms/".format(endpoint)
rule = {'event_type': metername, }
payload = json.dumps({'state': self._get_alarm_state(severity),
'name': alarm_name,
'severity': severity,
'severity': alarm_severity,
'type': "event",
'event_rule': rule,
})

View File

@ -72,10 +72,15 @@ class Meter(object):
"""Get the notification message."""
return vl.message
def severity(self, vl):
def collectd_severity(self, vl):
"""Get the notification severity and translate to Aodh severity type."""
collectd_severity = {self._collectd.NOTIF_FAILURE: 'critical',
self._collectd.NOTIF_WARNING: 'moderate',
self._collectd.NOTIF_OKAY: 'low',
}.get(vl.severity)
return collectd_severity
def alarm_severity(self, meter_name):
"""Get the user-defined severity for the alarm, or use default."""
# pylint: disable=no-self-use
return Config.instance().alarm_severity(meter_name)

View File

@ -76,6 +76,9 @@ class Config(object):
self._user_units = {}
self._units = UNITS.copy()
# dictionary for user defined severities
self._alarm_severities = {}
def read(self, cfg):
"""Read the collectd configuration
@ -106,6 +109,17 @@ class Config(object):
return unit
return self._units.get(plugin, "None")
def alarm_severity(self, meter):
"""Get alarm_severity for a meter."""
# check for an user-defined alarm severity
try:
# check for an user-defined alarm severity
return self._alarm_severities[meter]
except KeyError as ke:
LOGGER.info(ke)
LOGGER.info('There is no user-defined severity for this alarm')
return 'moderate'
def _read_node(self, node):
"""Read a configuration node
@ -119,6 +133,11 @@ class Config(object):
self._read_units(node.children)
return
# if the node is 'ALARM_SEVERITIES' call the alarm severities function
if key == 'ALARM_SEVERITIES':
self._read_alarm_severities(node.children)
return
# if we have a root node read only all its children
# as we don't expect any value here
if node.children:
@ -172,3 +191,20 @@ class Config(object):
LOGGER.error(
'Invalid unit configuration: %s', node.key.upper())
self._units.update(self._user_units)
def _read_alarm_severities(self, nodes):
"""Read in any user-defined severity settings for alarms."""
for node in nodes:
if node.key.upper() == 'ALARM_SEVERITY':
if len(node.values) == 2:
key, val = node.values
self._alarm_severities[key] = val
else:
LOGGER.error(
'Invalid alarm severity configuration:\
severity %s' % ' '.join(
['"%s"' % i for i in node.values]))
else:
LOGGER.error(
'Invalid alarm severity configuration: %s',
node.key.upper())

View File

@ -17,15 +17,18 @@
"""Plugin tests."""
import abc
import logging
import mock
import requests
import six
import unittest
from collectd_ceilometer.aodh import plugin
from collectd_ceilometer.aodh import sender
from collectd_ceilometer.common.keystone_light import KeystoneException
from collectd_ceilometer.common.meters import base
from collectd_ceilometer.common import settings
Logger = logging.getLoggerClass()
@ -79,9 +82,55 @@ def mock_config(**kwargs):
**kwargs)
def config_module(
values, severities=None,
module_name="collectd_ceilometer.ceilometer.plugin"):
children = [config_value(key, value)
for key, value in six.iteritems(values)]
if severities:
children.append(config_severities(severities))
return config_node('MODULE', children=children, value=module_name)
def config_severities(severities):
"""Create a mocked collectd config node having severities for alarms."""
children = [config_value('ALARM_SEVERITY', key, value)
for key, value in six.iteritems(severities)]
return config_node('ALARM_SEVERITIES', children)
def config_node(key, children, value=None):
"""Create a mocked collectd config node having given children and value."""
return mock.create_autospec(
spec=MockedConfig, spec_set=True, instance=True,
children=tuple(children), key=key, values=(value,))
def config_value(key, *values):
"""Create a mocked collectd config node having given multiple values."""
return mock.create_autospec(
spec=MockedConfig, spec_set=True, instance=True,
children=tuple(), key=key, values=values)
class MockedConfig(object):
"""Mocked config class."""
@abc.abstractproperty
def children(self):
"""Mocked children method."""
pass
@abc.abstractproperty
def key(self):
"""Mocked key method."""
pass
@abc.abstractproperty
def values(self):
"""Mocked values method."""
pass
def mock_value(
host='localhost', plugin='cpu', plugin_instance='0',
@ -111,6 +160,18 @@ class MockedValue(object):
class TestPlugin(unittest.TestCase):
"""Test the collectd plugin."""
@property
def default_values(self):
"""Default configuration values."""
return dict(
BATCH_SIZE=1,
OS_AUTH_URL='https://test-auth.url.tld/test',
CEILOMETER_URL_TYPE='internalURL',
CEILOMETER_TIMEOUT=1000,
OS_USERNAME='tester',
OS_PASSWORD='testpasswd',
OS_TENANT_NAME='service')
@mock.patch.object(plugin, 'Plugin', autospec=True)
@mock.patch.object(plugin, 'Config', autospec=True)
@mock.patch.object(plugin, 'CollectdLogHandler', autospec=True)
@ -159,16 +220,17 @@ class TestPlugin(unittest.TestCase):
_get_alarm_name.return_value = 'my-alarm'
meter_name = meter.meter_name.return_value
severity = meter.severity.return_value
severity = meter.collectd_severity.return_value
resource_id = meter.resource_id.return_value
alarm_severity = meter.alarm_severity.return_value
# send the values
instance.send(meter_name, severity, resource_id)
instance.send(meter_name, severity, resource_id, alarm_severity)
# check that the function is called
_update_or_create_alarm.assert_called_once_with(
instance, 'my-alarm', auth_client.auth_token,
severity, meter_name)
severity, meter_name, alarm_severity)
# reset function
_update_or_create_alarm.reset_mock()
@ -176,12 +238,12 @@ class TestPlugin(unittest.TestCase):
# run test again for failed attempt
_update_or_create_alarm.return_value = None
instance.send(meter_name, severity, resource_id)
instance.send(meter_name, severity, resource_id, alarm_severity)
# and values that have been sent
_update_or_create_alarm.assert_called_once_with(
instance, 'my-alarm', auth_client.auth_token,
severity, meter_name)
severity, meter_name, alarm_severity)
# reset post method
_update_or_create_alarm.reset_mock()
@ -211,11 +273,12 @@ class TestPlugin(unittest.TestCase):
# init values to send
_get_alarm_id.return_value = 'my-alarm-id'
metername = meter.meter_name.return_value
severity = meter.severity.return_value
severity = meter.collectd_severity.return_value
rid = meter.resource_id.return_value
alarm_severity = meter.alarm_severity.return_value
# send the values
instance.send(metername, severity, rid)
instance.send(metername, severity, rid, alarm_severity)
# update the alarm
put.assert_called_once_with(
@ -254,11 +317,12 @@ class TestPlugin(unittest.TestCase):
_get_alarm_id.side_effect = KeyError()
_create_alarm.return_value = requests.Response(), 'my-alarm-id'
metername = meter.meter_name.return_value
severity = meter.severity.return_value
severity = meter.collectd_severity.return_value
rid = meter.resource_id.return_value
alarm_severity = meter.alarm_severity.return_value
# send the values again
instance.send(metername, severity, rid)
instance.send(metername, severity, rid, alarm_severity)
put.assert_not_called()
@ -295,7 +359,7 @@ class TestPlugin(unittest.TestCase):
# no requests method has been called
put.assert_not_called()
@mock.patch.object(base.Meter, 'severity', spec=callable)
@mock.patch.object(base.Meter, 'collectd_severity', spec=callable)
def test_get_alarm_state_severity_low(self, severity):
"""Test _get_alarm_state if severity is 'low'.
@ -316,7 +380,7 @@ class TestPlugin(unittest.TestCase):
self.assertNotEqual(instance._get_alarm_state('low'),
'insufficient data')
@mock.patch.object(base.Meter, 'severity', spec=callable)
@mock.patch.object(base.Meter, 'collectd_severity', spec=callable)
def test_get_alarm_state_severity_moderate(self, severity):
"""Test _get_alarm_state if severity is 'moderate'.
@ -337,7 +401,7 @@ class TestPlugin(unittest.TestCase):
self.assertNotEqual(instance._get_alarm_state('moderate'),
'insufficient data')
@mock.patch.object(base.Meter, 'severity', spec=callable)
@mock.patch.object(base.Meter, 'collectd_severity', spec=callable)
def test_get_alarm_state_severity_critical(self, severity):
"""Test _get_alarm_state if severity is 'critical'.
@ -405,15 +469,16 @@ class TestPlugin(unittest.TestCase):
alarm_name = _get_alarm_name.return_value
meter_name = meter.meter_name.return_value
severity = meter.severity.return_value
severity = meter.collectd_severity.return_value
resource_id = meter.resource_id.return_value
alarm_severity = meter.alarm_severity.return_value
# send the data
instance.send(meter_name, severity, resource_id)
instance.send(meter_name, severity, resource_id, alarm_severity)
_update_or_create_alarm.assert_called_once_with(
instance, alarm_name, client.auth_token,
severity, meter_name)
severity, meter_name, alarm_severity)
# de-assert the request
_update_or_create_alarm.reset_mock()
@ -432,11 +497,11 @@ class TestPlugin(unittest.TestCase):
client.auth_token = 'Test auth token'
# send the data
instance.send(meter_name, severity, resource_id)
instance.send(meter_name, severity, resource_id, alarm_severity)
_update_or_create_alarm.assert_called_once_with(
instance, alarm_name, client.auth_token,
severity, meter_name)
severity, meter_name, alarm_severity)
# update/create response is unauthorized -> new token needs
# to be acquired
@ -447,7 +512,7 @@ class TestPlugin(unittest.TestCase):
client.auth_token = 'New test auth token'
# send the data again
instance.send(meter_name, severity, resource_id)
instance.send(meter_name, severity, resource_id, alarm_severity)
@mock.patch.object(sender, 'ClientV3', autospec=True)
@mock.patch.object(plugin, 'Notifier', autospec=True)
@ -477,3 +542,106 @@ class TestPlugin(unittest.TestCase):
instance = plugin.Plugin(collectd=collectd, config=config)
instance.shutdown
@mock.patch.object(settings, 'LOGGER', autospec=True)
def test_user_severities(self, LOGGER):
"""Test if a user enters a severity for a specific meter.
Set-up: Create a node with some user defined severities
Configure the node
Test: Read the configured node and compare the results
of the method to the severities configured in the node
Expected-behaviour: Valid mapping metric names are mapped correctly
to severities, and invalid values return None.
"""
node = config_module(
values=self.default_values,
severities={'age': 'low',
'star.distance': 'moderate',
'star.temperature': 'critical'})
config = settings.Config._decorated()
config.read(node)
LOGGER.error.assert_not_called()
self.assertEqual(config.alarm_severity('age'), 'low')
self.assertEqual(config.alarm_severity('star.distance'), 'moderate')
self.assertEqual(config.alarm_severity('star.temperature'), 'critical')
self.assertEqual(config.alarm_severity('monty'), 'moderate')
self.assertEqual(config.alarm_severity('python'), 'moderate')
@mock.patch.object(settings, 'LOGGER', autospec=True)
def test_user_severities_invalid(self, LOGGER):
"""Test invalid user defined severities.
Set-up: Configure the node with one defined severity
Set a configuration to have 3 entries instead of the 2
which are expected
Test: Try to read the configuration node with incorrect configurations
Compare the configuration to the response on the method
Expected-behaviour: alarm_severity will return None
Log will be written that severities were
incorrectly configured
"""
node = config_module(values=self.default_values,
severities=dict(age='low'))
# make some alarm severity entry invalid
for child in node.children:
if child.key == 'ALARM_SEVERITIES':
child.children[0].values = (1, 2, 3)
break
config = settings.Config._decorated()
config.read(node)
self.assertEqual(config.alarm_severity('age'), 'moderate')
LOGGER.error.assert_called_with(
'Invalid alarm severity configuration: \
severity "1" "2" "3"')
@mock.patch.object(settings, 'LOGGER', autospec=True)
def test_user_severities_invalid_node(self, LOGGER):
"""Test invalid node with severities configuration.
Set-up: Set up a configuration node with a severity defined
Configure the node with an incorrect module title
Test: Read the incorrect configuration node
Expected-behaviour: Error will be recorded in the log
Severity configuration will return None
"""
node = config_module(values=self.default_values,
severities=dict(age='moderate'))
# make some alarm severity entry invalid
for child in node.children:
if child.key == 'ALARM_SEVERITIES':
child.children[0].key = 'NOT_SEVERITIES'
break
config = settings.Config._decorated()
config.read(node)
LOGGER.error.assert_called_with(
'Invalid alarm severity configuration: %s', "NOT_SEVERITIES")
self.assertEqual(config.alarm_severity('age'), 'moderate')
def test_read_alarm_severities(self):
"""Test reading in user defined alarm severities method.
Set-up: Set up a node configured with a severities dictionary defined
Test: Read the node for the ALARM_SEVERITY configuration
Expected-behaviour: Info log will be recorded
Severities are correctly configured
"""
node = config_module(values=self.default_values,
severities=dict(age='low'))
for n in node.children:
if n.key.upper() == 'ALARM_SEVERITY':
if len(n.values) == 2:
key, val = n.values
break
config = settings.Config._decorated()
config._read_node(node)
self.assertEqual('low', config.alarm_severity('age'))

View File

@ -21,6 +21,7 @@ from __future__ import unicode_literals
from collectd_ceilometer.common.meters.base import Meter
from collectd_ceilometer.tests.base import TestCase
import mock
@ -96,3 +97,18 @@ class MetersTest(TestCase):
self._collectd.get_dataset.assert_called_once()
LOGGER.warning.assert_called_once()
self.assertEqual("gauge", actual)
@mock.patch.object(Meter, 'meter_name', autospec=True)
def test_default_alarm_severity(self, metername):
"""Test the default severity setting.
Set-up: set the return value for severity
Call the alarm_severity method and its pre-requistites
Test: Compare the configured severity to the result of alarm_severity()
Expected behaviour: Result will be True
"""
metername.return_value = 'my-meter'
result = self.meter.alarm_severity('my-meter')
self.assertEqual(result, 'moderate')

View File

@ -162,6 +162,7 @@ function adapt_collectd_conf {
sudo sed -i 's|CEILOMETER_TIMEOUT.*$|CEILOMETER_TIMEOUT "'$CEILOMETER_TIMEOUT'"|g' $COLLECTD_CONF_DIR/collectd-aodh-plugin.conf
sudo sed -i 's|OS_PASSWORD.*$|OS_PASSWORD "'$SERVICE_PASSWORD'"|g' $COLLECTD_CONF_DIR/collectd-aodh-plugin.conf
sudo sed -i 's|OS_TENANT_NAME.*$|OS_TENANT_NAME "'$SERVICE_TENANT_NAME'"|g' $COLLECTD_CONF_DIR/collectd-aodh-plugin.conf
config_custom_severities "aodh" "$COLLECTD_ALARM_SEVERITIES"
fi
# Configure collectd logfile plugin
@ -196,6 +197,23 @@ function config_custom_units {
fi
}
# Check for severitys defined by the user
function config_custom_severities {
service_name=$1
custom_severities=$2
if [[ $custom_severities != none ]]; then
OIFS=$IFS
IFS=','
for ALARM_SEVERITIES in $custom_severities;
do
sudo sed -i '/<ALARM_SEVERITIES>/a\ ALARM_SEVERITY '"$ALARM_SEVERITIES"'' $COLLECTD_CONF_DIR/collectd-$service_name-plugin.conf
done
IFS=$OIFS
fi
}
# remove plugin conf file
function restore_collectd_conf {
# Remove all of the .conf files once they are backed up

View File

@ -44,6 +44,9 @@ fi
COLLECTD_CEILOMETER_UNITS=${COLLECTD_CEILOMETER_CUSTOM_UNITS:-none}
COLLECTD_GNOCCHI_UNITS=${COLLECTD_GNOCCHI_CUSTOM_UNITS:-none}
# Custom alarm severities feature
COLLECTD_ALARM_SEVERITIES=${COLLECTD_AODH_SEVERITIES:-none}
# User defined requirements
COLLECTD_ADDITIONAL_PACKAGES=${COLLECTD_ADDITIONAL_PACKAGES:-''}
COLLECTD_REQUIREMENTS=''

View File

@ -170,6 +170,19 @@ To enable aodh with collectd, add the following to your local.conf:
COLLECTD_AODH_ENABLED=True
This plugin allows you to specify the severity of the alarms that will be
created for meters.
To enable this add the following line to your local.conf, once you have enabled
the collectd-aodh-plugin:
::
COLLECTD_AODH_SEVERITIES='"<meter>" <severity>'
You can set this severity to be one of three settings "low", "moderate" or
"critical". If you leave this unset for any of the meters that trigger an alarm
severity will default to "moderate".
In addition to this, alarms can be created manually via the Aodh command line
based on the metrics generated by the gnocchi plugin and meters from the
ceilometer plugin. For more information on this please read

View File

@ -35,6 +35,18 @@ To use collectd-ceilometer-plugin in collectd::
local.conf settings
-------------------
COLLECTD_AODH_SEVERITIES
~~~~~~~~~~~~~~~~~~~~~~~
("meter" severity) a comma seperated list of pairs, defining meters and the
severity of the alarms they will trigger if their value goes above a
certain threshold. The meter is generally in the form of "plugin.type"
where plugin and plugin type are attributes of the collectd data. There are
three severity options available "low", "moderate" and "critical".
If this configuration option is left unset for any alarm the default
severity is "moderate".
Example: COLLECTD_AODH_SEVERITIES='"<meter>" <severity>, "<meter>" <severity>'
COLLECTD_BRANCH
~~~~~~~~~~~~~~~
(<git branch>) Indicates which branch of collectd to checkout before

View File

@ -27,6 +27,8 @@
OS_PASSWORD "password"
OS_TENANT_NAME "service"
<ALARM_SEVERITIES>
</ALARM_SEVERITIES>
</Module>
</Plugin>

View File

@ -0,0 +1,8 @@
---
- features:
Included a new feature that allows you to specify the severity of any alarm
created on a meter/metric. This can be specified in the local.conf using the
configuration option "COLLECTD_AODH_SEVERITIES" and will be configured on
deployment with devStack. If left unset the alarms severity will default
to "moderate". Included more detailed usage instructions in devStackGSG.rst
for this new feature.