diff --git a/masakarimonitors/conf/host.py b/masakarimonitors/conf/host.py index 9670a98..c9a88cf 100644 --- a/masakarimonitors/conf/host.py +++ b/masakarimonitors/conf/host.py @@ -20,6 +20,35 @@ monitor_host_opts = [ cfg.IntOpt('monitoring_interval', default=60, help='Monitoring interval(in seconds) of node status.'), + cfg.IntOpt('api_retry_max', + default=12, + help='Number of retries for send a notification in' + ' processmonitor.'), + cfg.IntOpt('api_retry_interval', + default=10, + help='Trial interval of time of the notification processing' + ' is error(in seconds).'), + cfg.BoolOpt('disable_ipmi_check', + default=False, + help=''' +Do not check whether the host is completely down. + +Possible values: + +* True: Do not check whether the host is completely down. +* False: Do check whether the host is completely down. + +If ipmi RA is not set in pacemaker, this value should be set True. +'''), + cfg.IntOpt('ipmi_timeout', + default=5, + help='Timeout value(in seconds) of the ipmitool command.'), + cfg.IntOpt('ipmi_retry_max', + default=3, + help='Number of ipmitool command retries.'), + cfg.IntOpt('ipmi_retry_interval', + default=10, + help='Retry interval(in seconds) of the ipmitool command.'), ] diff --git a/masakarimonitors/hostmonitor/host_handler/handle_host.py b/masakarimonitors/hostmonitor/host_handler/handle_host.py index fa235cb..46dbe5f 100644 --- a/masakarimonitors/hostmonitor/host_handler/handle_host.py +++ b/masakarimonitors/hostmonitor/host_handler/handle_host.py @@ -16,14 +16,17 @@ import socket import eventlet from oslo_log import log as oslo_logging +from oslo_utils import timeutils import masakarimonitors.conf +from masakarimonitors.ha import masakari import masakarimonitors.hostmonitor.host_handler.driver as driver from masakarimonitors.hostmonitor.host_handler import hold_host_status from masakarimonitors.hostmonitor.host_handler import parse_cib_xml from masakarimonitors.i18n import _LE from masakarimonitors.i18n import _LI from masakarimonitors.i18n import _LW +from masakarimonitors.objects import event_constants as ec from masakarimonitors import utils LOG = oslo_logging.getLogger(__name__) @@ -41,6 +44,7 @@ class HandleHost(driver.DriverBase): self.my_hostname = socket.gethostname() self.xml_parser = parse_cib_xml.ParseCibXml() self.status_holder = hold_host_status.HostHoldStatus() + self.notifier = masakari.SendNotification() def _check_host_status_by_crmadmin(self): try: @@ -81,6 +85,87 @@ class HandleHost(driver.DriverBase): return out + def _is_poweroff(self, hostname): + ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname) + if ipmi_values is None: + LOG.error(_LE("Failed to get params of ipmi RA.")) + return False + + cmd_str = ("timeout %s ipmitool -U %s -P %s -I %s -H %s " + "power status") \ + % (str(CONF.host.ipmi_timeout), ipmi_values['userid'], + ipmi_values['passwd'], ipmi_values['interface'], + ipmi_values['ipaddr']) + command = cmd_str.split(' ') + + retry_count = 0 + while True: + try: + # Execute ipmitool command. + out, err = utils.execute(*command, run_as_root=False) + + if err: + msg = ("ipmitool command output stderr: %s") % err + raise Exception(msg) + + msg = ("ipmitool command output stdout: %s") % out + + if 'Power is off' in out: + LOG.info(_LI("%s"), msg) + return True + else: + raise Exception(msg) + + except Exception as e: + if retry_count < CONF.host.ipmi_retry_max: + LOG.warning(_LW("Retry executing ipmitool command. (%s)"), + e) + retry_count = retry_count + 1 + eventlet.greenthread.sleep(CONF.host.ipmi_retry_interval) + else: + LOG.error(_LE("Exception caught: %s"), e) + return False + + def _make_event(self, hostname, current_status): + + if current_status == 'online': + # Set values that host has started. + event_type = ec.EventConstants.EVENT_STARTED + cluster_status = current_status.upper() + host_status = ec.EventConstants.HOST_STATUS_NORMAL + + else: + # Set values that host has stopped. + event_type = ec.EventConstants.EVENT_STOPPED + cluster_status = current_status.upper() + + if not CONF.host.disable_ipmi_check: + if self._is_poweroff(hostname): + # Set value that host status is normal. + host_status = ec.EventConstants.HOST_STATUS_NORMAL + else: + # Set value that host status is unknown. + host_status = ec.EventConstants.HOST_STATUS_UNKNOWN + else: + # Set value that host status is normal. + host_status = ec.EventConstants.HOST_STATUS_NORMAL + + current_time = timeutils.utcnow() + event = { + 'notification': { + 'type': ec.EventConstants.TYPE_COMPUTE_HOST, + 'hostname': hostname, + 'generated_time': current_time, + 'payload': { + 'event': event_type, + 'cluster_status': cluster_status, + 'host_status': host_status + } + } + } + + return event + def _check_if_status_changed(self, node_state_tag_list): # Check if host status changed. @@ -111,9 +196,22 @@ class HandleHost(driver.DriverBase): # If host status changed, send a notification. if current_status != old_status: - # TODO(takahara.kengo) - # Implement the notification processing. - pass + if current_status != 'online' and current_status != 'offline': + # If current_status is not 'online' or 'offline', + # hostmonitor doesn't send a notification. + msg = ("Since host status is '%s'," + " hostmonitor doesn't send a notification.") \ + % current_status + LOG.info(_LI("%s"), msg) + else: + event = self._make_event(node_state_tag.get('uname'), + current_status) + + # Send a notification. + self.notifier.send_notification( + CONF.host.api_retry_max, + CONF.host.api_retry_interval, + event) # Update host status. self.status_holder.set_host_status(node_state_tag) diff --git a/masakarimonitors/hostmonitor/host_handler/parse_cib_xml.py b/masakarimonitors/hostmonitor/host_handler/parse_cib_xml.py index 569708b..6717aa5 100644 --- a/masakarimonitors/hostmonitor/host_handler/parse_cib_xml.py +++ b/masakarimonitors/hostmonitor/host_handler/parse_cib_xml.py @@ -86,3 +86,99 @@ class ParseCibXml(object): LOG.error(_LE("Cib xml doesn't have node_state tag.")) return node_state_tag_list + + def _parse_instance_attributes_tag(self, + instance_attributes_tag, hostname): + # Parse nvpair tag under the instance_attributes tag. + is_target_ipmi = False + ipmi_values = {} + + nvpair_tag_list = instance_attributes_tag.getchildren() + for nvpair_tag in nvpair_tag_list: + if nvpair_tag.get('name') == 'hostname' and \ + nvpair_tag.get('value') == hostname: + is_target_ipmi = True + elif nvpair_tag.get('name') == 'ipaddr': + ipmi_values['ipaddr'] = nvpair_tag.get('value') + elif nvpair_tag.get('name') == 'userid': + ipmi_values['userid'] = nvpair_tag.get('value') + elif nvpair_tag.get('name') == 'passwd': + ipmi_values['passwd'] = nvpair_tag.get('value') + elif nvpair_tag.get('name') == 'interface': + ipmi_values['interface'] = nvpair_tag.get('value') + + if is_target_ipmi is True: + return ipmi_values + else: + return None + + def _parse_primitive_tag(self, primitive_tag, hostname): + if primitive_tag.get('type') != 'external/ipmi': + return None + + # Parse instance_attributes tag under the primitive tag. + child_list = primitive_tag.getchildren() + for child in child_list: + if child.tag == 'instance_attributes': + ipmi_values = self._parse_instance_attributes_tag( + child, hostname) + if ipmi_values is not None: + return ipmi_values + return None + + def _parse_group_tag(self, group_tag, hostname): + # Parse primitive tag under the group tag. + child_list = group_tag.getchildren() + for child in child_list: + if child.tag == 'primitive': + ipmi_values = self._parse_primitive_tag(child, hostname) + if ipmi_values is not None: + return ipmi_values + return None + + def get_stonith_ipmi_params(self, hostname): + """Get stonith ipmi params from cib xml. + + This method gets params of ipmi resource agent(RA) which is set on + resources tag. + The resources tag exsists under the configuration tag. + And it is assumed that ipmi RA belongs to some resource group. + + :params hostname: hostname + + :returns: Dictionary of ipmi RA's params. + They are ipaddr, userid, passwd and interface. + """ + # Get configuration tag from cib tag. + configuration_tag = None + child_list = self.cib_tag.getchildren() + for child in child_list: + if child.tag == 'configuration': + configuration_tag = child + break + if configuration_tag is None: + LOG.error(_LE("Cib xml doesn't have configuration tag.")) + return None + + # Get resources tag from configuration tag. + resources_tag = None + child_list = configuration_tag.getchildren() + for child in child_list: + if child.tag == 'resources': + resources_tag = child + break + if resources_tag is None: + LOG.error(_LE("Cib xml doesn't have resources tag.")) + return None + + # They are set at nvpair tag which exists under the + # instance_attributes of primitive of group tag. + ipmi_values = None + child_list = resources_tag.getchildren() + for child in child_list: + if child.tag == 'group': + ipmi_values = self._parse_group_tag(child, hostname) + if ipmi_values is not None: + break + + return ipmi_values diff --git a/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_cib_xml.py b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_cib_xml.py index 60ac7c2..5c826f1 100644 --- a/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_cib_xml.py +++ b/masakarimonitors/tests/unit/hostmonitor/host_handler/test_parse_cib_xml.py @@ -23,6 +23,74 @@ from masakarimonitors.hostmonitor.host_handler import parse_cib_xml eventlet.monkey_patch(os=False) CIB_XML = '' \ + ' ' \ + ' test' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ + ' ' \ ' ' \ ' ' \ ' ' \ @@ -63,3 +131,15 @@ class TestParseCibXml(testtools.TestCase): for node_state_tag in node_state_tag_list: self.assertEqual('online', node_state_tag.get('crmd')) + + def test_get_stonith_ipmi_params(self): + + obj = parse_cib_xml.ParseCibXml() + obj.set_cib_xml(CIB_XML) + + ipmi_values = obj.get_stonith_ipmi_params('compute-node') + + self.assertEqual('192.168.10.21', ipmi_values['ipaddr']) + self.assertEqual('admin', ipmi_values['userid']) + self.assertEqual('password', ipmi_values['passwd']) + self.assertEqual('lanplus', ipmi_values['interface'])