Add implement of sending a notification

This patch adds implementation of sending a notification.

Change-Id: I2582ca76e1aeb897b908b35e3de4074e9cabc640
Implements: bp pythonize-host-and-process-monitor
This commit is contained in:
Kengo Takahara 2017-02-10 20:22:54 +09:00
parent fc8a8568dd
commit 826d909e59
4 changed files with 306 additions and 3 deletions

View File

@ -20,6 +20,35 @@ monitor_host_opts = [
cfg.IntOpt('monitoring_interval',
default=60,
help='Monitoring interval(in seconds) of node status.'),
cfg.IntOpt('api_retry_max',
default=12,
help='Number of retries for send a notification in'
' processmonitor.'),
cfg.IntOpt('api_retry_interval',
default=10,
help='Trial interval of time of the notification processing'
' is error(in seconds).'),
cfg.BoolOpt('disable_ipmi_check',
default=False,
help='''
Do not check whether the host is completely down.
Possible values:
* True: Do not check whether the host is completely down.
* False: Do check whether the host is completely down.
If ipmi RA is not set in pacemaker, this value should be set True.
'''),
cfg.IntOpt('ipmi_timeout',
default=5,
help='Timeout value(in seconds) of the ipmitool command.'),
cfg.IntOpt('ipmi_retry_max',
default=3,
help='Number of ipmitool command retries.'),
cfg.IntOpt('ipmi_retry_interval',
default=10,
help='Retry interval(in seconds) of the ipmitool command.'),
]

View File

@ -16,14 +16,17 @@ import socket
import eventlet
from oslo_log import log as oslo_logging
from oslo_utils import timeutils
import masakarimonitors.conf
from masakarimonitors.ha import masakari
import masakarimonitors.hostmonitor.host_handler.driver as driver
from masakarimonitors.hostmonitor.host_handler import hold_host_status
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
from masakarimonitors.i18n import _LE
from masakarimonitors.i18n import _LI
from masakarimonitors.i18n import _LW
from masakarimonitors.objects import event_constants as ec
from masakarimonitors import utils
LOG = oslo_logging.getLogger(__name__)
@ -41,6 +44,7 @@ class HandleHost(driver.DriverBase):
self.my_hostname = socket.gethostname()
self.xml_parser = parse_cib_xml.ParseCibXml()
self.status_holder = hold_host_status.HostHoldStatus()
self.notifier = masakari.SendNotification()
def _check_host_status_by_crmadmin(self):
try:
@ -81,6 +85,87 @@ class HandleHost(driver.DriverBase):
return out
def _is_poweroff(self, hostname):
ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname)
if ipmi_values is None:
LOG.error(_LE("Failed to get params of ipmi RA."))
return False
cmd_str = ("timeout %s ipmitool -U %s -P %s -I %s -H %s "
"power status") \
% (str(CONF.host.ipmi_timeout), ipmi_values['userid'],
ipmi_values['passwd'], ipmi_values['interface'],
ipmi_values['ipaddr'])
command = cmd_str.split(' ')
retry_count = 0
while True:
try:
# Execute ipmitool command.
out, err = utils.execute(*command, run_as_root=False)
if err:
msg = ("ipmitool command output stderr: %s") % err
raise Exception(msg)
msg = ("ipmitool command output stdout: %s") % out
if 'Power is off' in out:
LOG.info(_LI("%s"), msg)
return True
else:
raise Exception(msg)
except Exception as e:
if retry_count < CONF.host.ipmi_retry_max:
LOG.warning(_LW("Retry executing ipmitool command. (%s)"),
e)
retry_count = retry_count + 1
eventlet.greenthread.sleep(CONF.host.ipmi_retry_interval)
else:
LOG.error(_LE("Exception caught: %s"), e)
return False
def _make_event(self, hostname, current_status):
if current_status == 'online':
# Set values that host has started.
event_type = ec.EventConstants.EVENT_STARTED
cluster_status = current_status.upper()
host_status = ec.EventConstants.HOST_STATUS_NORMAL
else:
# Set values that host has stopped.
event_type = ec.EventConstants.EVENT_STOPPED
cluster_status = current_status.upper()
if not CONF.host.disable_ipmi_check:
if self._is_poweroff(hostname):
# Set value that host status is normal.
host_status = ec.EventConstants.HOST_STATUS_NORMAL
else:
# Set value that host status is unknown.
host_status = ec.EventConstants.HOST_STATUS_UNKNOWN
else:
# Set value that host status is normal.
host_status = ec.EventConstants.HOST_STATUS_NORMAL
current_time = timeutils.utcnow()
event = {
'notification': {
'type': ec.EventConstants.TYPE_COMPUTE_HOST,
'hostname': hostname,
'generated_time': current_time,
'payload': {
'event': event_type,
'cluster_status': cluster_status,
'host_status': host_status
}
}
}
return event
def _check_if_status_changed(self, node_state_tag_list):
# Check if host status changed.
@ -111,9 +196,22 @@ class HandleHost(driver.DriverBase):
# If host status changed, send a notification.
if current_status != old_status:
# TODO(takahara.kengo)
# Implement the notification processing.
pass
if current_status != 'online' and current_status != 'offline':
# If current_status is not 'online' or 'offline',
# hostmonitor doesn't send a notification.
msg = ("Since host status is '%s',"
" hostmonitor doesn't send a notification.") \
% current_status
LOG.info(_LI("%s"), msg)
else:
event = self._make_event(node_state_tag.get('uname'),
current_status)
# Send a notification.
self.notifier.send_notification(
CONF.host.api_retry_max,
CONF.host.api_retry_interval,
event)
# Update host status.
self.status_holder.set_host_status(node_state_tag)

View File

@ -86,3 +86,99 @@ class ParseCibXml(object):
LOG.error(_LE("Cib xml doesn't have node_state tag."))
return node_state_tag_list
def _parse_instance_attributes_tag(self,
instance_attributes_tag, hostname):
# Parse nvpair tag under the instance_attributes tag.
is_target_ipmi = False
ipmi_values = {}
nvpair_tag_list = instance_attributes_tag.getchildren()
for nvpair_tag in nvpair_tag_list:
if nvpair_tag.get('name') == 'hostname' and \
nvpair_tag.get('value') == hostname:
is_target_ipmi = True
elif nvpair_tag.get('name') == 'ipaddr':
ipmi_values['ipaddr'] = nvpair_tag.get('value')
elif nvpair_tag.get('name') == 'userid':
ipmi_values['userid'] = nvpair_tag.get('value')
elif nvpair_tag.get('name') == 'passwd':
ipmi_values['passwd'] = nvpair_tag.get('value')
elif nvpair_tag.get('name') == 'interface':
ipmi_values['interface'] = nvpair_tag.get('value')
if is_target_ipmi is True:
return ipmi_values
else:
return None
def _parse_primitive_tag(self, primitive_tag, hostname):
if primitive_tag.get('type') != 'external/ipmi':
return None
# Parse instance_attributes tag under the primitive tag.
child_list = primitive_tag.getchildren()
for child in child_list:
if child.tag == 'instance_attributes':
ipmi_values = self._parse_instance_attributes_tag(
child, hostname)
if ipmi_values is not None:
return ipmi_values
return None
def _parse_group_tag(self, group_tag, hostname):
# Parse primitive tag under the group tag.
child_list = group_tag.getchildren()
for child in child_list:
if child.tag == 'primitive':
ipmi_values = self._parse_primitive_tag(child, hostname)
if ipmi_values is not None:
return ipmi_values
return None
def get_stonith_ipmi_params(self, hostname):
"""Get stonith ipmi params from cib xml.
This method gets params of ipmi resource agent(RA) which is set on
resources tag.
The resources tag exsists under the configuration tag.
And it is assumed that ipmi RA belongs to some resource group.
:params hostname: hostname
:returns: Dictionary of ipmi RA's params.
They are ipaddr, userid, passwd and interface.
"""
# Get configuration tag from cib tag.
configuration_tag = None
child_list = self.cib_tag.getchildren()
for child in child_list:
if child.tag == 'configuration':
configuration_tag = child
break
if configuration_tag is None:
LOG.error(_LE("Cib xml doesn't have configuration tag."))
return None
# Get resources tag from configuration tag.
resources_tag = None
child_list = configuration_tag.getchildren()
for child in child_list:
if child.tag == 'resources':
resources_tag = child
break
if resources_tag is None:
LOG.error(_LE("Cib xml doesn't have resources tag."))
return None
# They are set at nvpair tag which exists under the
# instance_attributes of primitive of group tag.
ipmi_values = None
child_list = resources_tag.getchildren()
for child in child_list:
if child.tag == 'group':
ipmi_values = self._parse_group_tag(child, hostname)
if ipmi_values is not None:
break
return ipmi_values

View File

@ -23,6 +23,74 @@ from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
eventlet.monkey_patch(os=False)
CIB_XML = '<cib have-quorum="1">' \
' <configuration>' \
' <crm_config>test</crm_config>' \
' <nodes>' \
' <node id="1084754452" uname="masakari-node"/>' \
' <node id="1084754453" uname="compute-node"/>' \
' </nodes>' \
' <resources>' \
' <group id="grpStonith1">' \
' <primitive id="stnt11" type="external/stonith-helper">' \
' <instance_attributes id="stnt11-instance_attributes">' \
' <nvpair name="hostlist" value="masakari-node"/>' \
' </instance_attributes>' \
' <operations>' \
' <op name="start"/>' \
' <op name="monitor"/>' \
' <op name="stop"/>' \
' </operations>' \
' </primitive>' \
' <primitive id="stnt12" type="external/ipmi">' \
' <instance_attributes id="stnt12-instance_attributes">' \
' <nvpair name="pcmk_reboot_timeout" value="60s"/>' \
' <nvpair name="hostname" value="masakari-node"/>' \
' <nvpair name="ipaddr" value="192.168.10.20"/>' \
' <nvpair name="userid" value="admin"/>' \
' <nvpair name="passwd" value="password"/>' \
' <nvpair name="interface" value="lanplus"/>' \
' </instance_attributes>' \
' <operations>' \
' <op name="start"/>' \
' <op name="monitor"/>' \
' <op name="stop"/>' \
' </operations>' \
' </primitive>' \
' </group>' \
' <group id="grpStonith2">' \
' <primitive id="stnt21" type="external/stonith-helper">' \
' <instance_attributes id="stnt21-instance_attributes">' \
' <nvpair name="hostlist" value="compute-node"/>' \
' </instance_attributes>' \
' <operations>' \
' <op name="start"/>' \
' <op name="monitor"/>' \
' <op name="stop"/>' \
' </operations>' \
' </primitive>' \
' <primitive id="stnt22" type="external/ipmi">' \
' <instance_attributes id="stnt22-instance_attributes">' \
' <nvpair name="pcmk_reboot_timeout" value="60s"/>' \
' <nvpair name="hostname" value="compute-node"/>' \
' <nvpair name="ipaddr" value="192.168.10.21"/>' \
' <nvpair name="userid" value="admin"/>' \
' <nvpair name="passwd" value="password"/>' \
' <nvpair name="interface" value="lanplus"/>' \
' </instance_attributes>' \
' <operations>' \
' <op name="start"/>' \
' <op name="monitor"/>' \
' <op name="stop"/>' \
' </operations>' \
' </primitive>' \
' </group>' \
' </resources>' \
' <constraints>' \
' <rsc_location id="loc_grpStonith1" rsc="grpStonith1">' \
' <rule test="hoge"/>' \
' </rsc_location>' \
' </constraints>' \
' </configuration>' \
' <status>' \
' <node_state uname="masakari-node" crmd="online">' \
' <test hoge="hoge"/>' \
@ -63,3 +131,15 @@ class TestParseCibXml(testtools.TestCase):
for node_state_tag in node_state_tag_list:
self.assertEqual('online', node_state_tag.get('crmd'))
def test_get_stonith_ipmi_params(self):
obj = parse_cib_xml.ParseCibXml()
obj.set_cib_xml(CIB_XML)
ipmi_values = obj.get_stonith_ipmi_params('compute-node')
self.assertEqual('192.168.10.21', ipmi_values['ipaddr'])
self.assertEqual('admin', ipmi_values['userid'])
self.assertEqual('password', ipmi_values['passwd'])
self.assertEqual('lanplus', ipmi_values['interface'])