Add a target_hostname to Host Alive plugin

To handle the case where the checked system has multiple IP Addresses
and the network name to be used for liveness checking is not the same as
the usual name used to identify the server in Monasca,
an additional target_hostname parameter can be configured. It is
the network hostname or IP Address to check instead of hostname.

Add unit tests as well

Change-Id: I33721764e64ef5079b26f78df84c94ed7a1009e7
This commit is contained in:
Craig Bryant 2016-10-06 14:24:51 -06:00
parent 57ce4abb67
commit c376ee8655
5 changed files with 226 additions and 24 deletions

View File

@ -805,14 +805,6 @@ See [the example configuration](https://github.com/openstack/monasca-agent/blob/
## Host Alive
An extension to the Agent can provide basic "aliveness" checks of other systems, verifying that the remote host (or device) is online. This check currently provides two methods of determining connectivity:
default dimensions:
observer_host: fqdn
hostname: fqdn | supplied
test_type: ping | ssh | Unrecognized alive_test
default value_meta
error: error_message
* ping (ICMP)
* SSH (banner test, port 22 by default)
@ -862,14 +854,43 @@ The instances section contains the hostname/IP to check, and the type of check t
alive_test: ssh
```
To handle the case where the target system has multiple IP Addresses and the network name to be used for
liveness checking is not the same as the usual name used to identify the server in Monasca,
an additional target_hostname parameter can be configured. It is the network hostname or IP
Address to check instead of host_name. The hostname dimension will always be set to the value of
host_name even if target_hostname is specified. A dimension target_hostname will be added
with the value of target_hostname if it is different from host_name.
To simplify configuring multiple checks, when the host_alive detection plugin is configured, hostname can
be a comma separated list. Instances will be created for each value. target_hostname can also
be a comma separated list, however, empty values for an individual entry can be given if there is
no target_hostname for a given hostname entry.
Here is an example of configuring target_hostname :
```
- name: ping somenode
host_name: somenode
target_hostname: somenode.mgmt.net
alive_test: ssh
```
The host alive checks return the following metrics
| Metric Name | Dimensions | Semantics |
| ----------- | ---------- | --------- |
| host_alive_status | observer_host=fqdn, hostname=supplied hostname being checked, test_type=ping or ssh | Status of remote host(device) is online or not. (0=online, 1=offline)
| host_alive_status | observer_host=fqdn of checking host, hostname=supplied hostname being checked, test_type=ping or ssh | Status of remote host(device) is online or not. (0=online, 1=offline)
Also in the case of an error the value_meta contains an error message.
The default dimensions are:
observer_host: fqdn
hostname: fqdn | supplied
target_hostname: Set to target_hostname only if that is different than host_name
test_type: ping | ssh | Unrecognized alive_test
default value_meta
error: error_message
## HTTP (endpoint status)
This section describes the http endpoint check that can be performed by the Agent. Http endpoint checks are checks that perform simple up/down checks on services, such as HTTP/REST APIs. An agent, given a list of URLs, can dispatch an http request and report to the API success/failure as a metric.

View File

@ -1,5 +1,5 @@
#!/bin/env python
# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development Company LP
# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development LP
"""Monitoring Agent remote host aliveness checker.
"""
@ -86,26 +86,38 @@ class HostAlive(services_checks.ServicesCheck):
return True, None
def _check(self, instance):
"""Run the desired host-alive check againt this host.
"""Run the desired host-alive check against this host.
"""
if not instance['host_name']:
raise ValueError('Target hostname not specified!')
host_name = instance.get('host_name', None)
if not host_name:
raise ValueError('host_name not specified!')
dimensions = self._set_dimensions({'hostname': instance['host_name'],
'observer_host': util.get_hostname()},
# Allow a different network name to be used for the check
# to handle multi-homed systems
if instance.get('target_hostname', None):
target_hostname = instance.get('target_hostname')
else:
target_hostname = host_name
host_dimensions = {'hostname': host_name, 'observer_host': util.get_hostname()}
# If the check is against a different network name than host_name, add it to
# the dimensions
if target_hostname != host_name:
host_dimensions['target_hostname'] = target_hostname
dimensions = self._set_dimensions(host_dimensions,
instance)
success = False
test_type = instance['alive_test']
if test_type == 'ssh':
success, error_message = self._test_ssh(instance['host_name'],
success, error_message = self._test_ssh(target_hostname,
self.init_config.get('ssh_port'),
self.init_config.get('ssh_timeout'))
elif test_type == 'ping':
success, error_message = self._test_ping(instance['host_name'],
success, error_message = self._test_ping(target_hostname,
self.init_config.get('ping_timeout'))
else:
error_message = 'Unrecognized alive_test: {0}'.format(test_type)

View File

@ -1,4 +1,4 @@
# (C) Copyright 2015 Hewlett Packard Enterprise Development Company LP
# (C) Copyright 2015,2016 Hewlett Packard Enterprise Development LP
import logging
@ -18,9 +18,13 @@ class HostAlive(monasca_setup.detection.ArgsPlugin):
monasca-setup -d hostalive -a "hostname=remotebox type=ping"
monasca-setup -d hostalive -a "hostname=remotebox,remotebox2 type=ssh"
monasca-setup -d hostalive -a "hostname=rb,rb2 target_hostname=,rb2-nic2 type=ssh"
"""
DEFAULT_PING_TIMEOUT = 1
DEFAULT_SSH_TIMEOUT = 2
DEFAULT_SSH_PORT = 22
def _detect(self):
"""Run detection, set self.available True if the service is detected.
"""
@ -43,10 +47,21 @@ class HostAlive(monasca_setup.detection.ArgsPlugin):
'host_name': hostname,
'alive_test': self.args['type']})
instances.append(instance)
if 'target_hostname' in self.args:
index = 0
network_names_to_check = self.args['target_hostname'].split(',')
for target_hostname in network_names_to_check:
if target_hostname:
if index >= len(instances):
raise Exception('Too many target_hostname values')
instance = instances[index]
instance.update({'target_hostname': target_hostname})
index += 1
config['host_alive'] = {'init_config': {'ping_timeout': 1,
'ssh_timeout': 2,
'ssh_port': 22},
'instances': instances}
config['host_alive'] = {
'init_config': {'ping_timeout': self.DEFAULT_PING_TIMEOUT,
'ssh_timeout': self.DEFAULT_SSH_TIMEOUT,
'ssh_port': self.DEFAULT_SSH_PORT},
'instances': instances}
return config

View File

@ -0,0 +1,70 @@
# (C) Copyright 2016 Hewlett Packard Enterprise Development LP
import mock
import unittest
import monasca_agent.common.util as util
from monasca_agent.collector.checks_d.host_alive import HostAlive
HOST_ALIVE_STATUS = 'host_alive_status'
SUCCESS = 0
FAILURE = 1
class TestHostAlive(unittest.TestCase):
def setUp(self):
unittest.TestCase.setUp(self)
init_config = {}
agent_config = {}
self._host_alive = HostAlive('TestHostAlive', init_config, agent_config)
self._gauge = mock.Mock()
self._host_alive.gauge = self._gauge
self._host_name = 'monasca'
self._instance = {'host_name': self._host_name,
'alive_test': 'ping'}
self._base_dimensions = {
'test_type': 'ping',
'hostname': self._host_name,
'observer_host': util.get_hostname()
}
def _run_check(self, host_name, instance, ping_result):
mock_ping = mock.Mock(return_value=ping_result)
self._host_alive._test_ping = mock_ping
self._host_alive._check(instance)
mock_ping.assert_called_with(host_name, None)
def test_host_is_alive(self):
ping_result = (True, None)
self._run_check(self._host_name, self._instance, ping_result)
self._gauge.assert_called_with(HOST_ALIVE_STATUS,
SUCCESS,
dimensions=self._base_dimensions)
def test_host_is_dead(self):
error_message = '''I'm not dead yet'''
self._run_check(self._host_name, self._instance,
(False, error_message))
self._gauge.assert_called_with('host_alive_status',
FAILURE,
dimensions=self._base_dimensions,
value_meta={'error': error_message})
def test_host_is_alive_with_target_hostname(self):
check_name = 'otherMonasca'
self._instance['target_hostname'] = check_name
self._run_check(check_name, self._instance, (True, None))
self._base_dimensions['target_hostname'] = check_name
self._gauge.assert_called_with(HOST_ALIVE_STATUS,
SUCCESS,
dimensions=self._base_dimensions)
def test_host_is_alive_with_dup_target_hostname(self):
host_name = 'monasca'
self._instance['target_hostname'] = host_name
self._run_check(host_name, self._instance, (True, None))
self._gauge.assert_called_with(HOST_ALIVE_STATUS,
SUCCESS,
dimensions=self._base_dimensions)

View File

@ -0,0 +1,84 @@
# (C) Copyright 2016 Hewlett Packard Enterprise Development LP
import mock
import unittest
from monasca_setup.detection.plugins.host_alive import HostAlive
class TestHostAliveDetect(unittest.TestCase):
def setUp(self):
unittest.TestCase.setUp(self)
self._host_alive = HostAlive('AAAA')
self._expected_config = {
'host_alive':
{
'init_config':
{
'ssh_timeout': self._host_alive.DEFAULT_SSH_TIMEOUT,
'ping_timeout': self._host_alive.DEFAULT_PING_TIMEOUT,
'ssh_port': self._host_alive.DEFAULT_SSH_PORT
}
}
}
def _create_instances(self, host_names, target_hostnames=None):
instances = []
index = 0
for name in host_names:
instance = {
'alive_test': 'ping',
'name': name + ' ping',
'host_name': name}
if (target_hostnames and
index < len(target_hostnames)):
target_hostname = target_hostnames[index]
# It is possible that a target_hostname is not
# set for each hostname
if target_hostname:
instance['target_hostname'] = target_hostname
index += 1
instances.append(instance)
self._expected_config['host_alive']['instances'] = instances
def _run_build_config(self, host_names, target_hostnames=None):
hostname = ','.join(host_names)
args = {
'type': 'ping',
'hostname': hostname,
}
if target_hostnames:
args['target_hostname'] = ','.join(target_hostnames)
self._host_alive.args = args
config = self._host_alive.build_config()
self._create_instances(host_names, target_hostnames)
self.assertEqual(config, self._expected_config)
def test_build_config_simple(self):
hostname = 'aaaa'
self._run_build_config([hostname])
def test_build_config_multiple(self):
host_names = ['aaaa', 'bbbb', 'cccc']
self._run_build_config(host_names)
def test_build_config_complex(self):
host_names = ['aaaa', 'bbbb', 'cccc']
target_hostnames = ['dddd', 'eeee', 'ffff']
self._run_build_config(host_names, target_hostnames)
def test_build_config_complex_sparse(self):
host_names = ['aaaa', 'bbbb', 'cccc']
target_hostnames = ['dddd', '', 'ffff']
self._run_build_config(host_names, target_hostnames)
def test_build_config_complex_not_matching(self):
host_names = ['aaaa', 'bbbb', 'cccc']
target_hostnames = ['dddd']
self._run_build_config(host_names, target_hostnames)
def test_build_config_complex_too_many(self):
host_names = ['aaaa', 'bbbb', 'cccc']
target_hostnames = ['dddd', 'eeee', 'ffff', 'gggg']
with self.assertRaises(Exception):
self._run_build_config(host_names, target_hostnames)