Add implement of sending a notification
This patch adds implementation of sending a notification. Change-Id: I2582ca76e1aeb897b908b35e3de4074e9cabc640 Implements: bp pythonize-host-and-process-monitor
This commit is contained in:
parent
fc8a8568dd
commit
826d909e59
|
@ -20,6 +20,35 @@ monitor_host_opts = [
|
|||
cfg.IntOpt('monitoring_interval',
|
||||
default=60,
|
||||
help='Monitoring interval(in seconds) of node status.'),
|
||||
cfg.IntOpt('api_retry_max',
|
||||
default=12,
|
||||
help='Number of retries for send a notification in'
|
||||
' processmonitor.'),
|
||||
cfg.IntOpt('api_retry_interval',
|
||||
default=10,
|
||||
help='Trial interval of time of the notification processing'
|
||||
' is error(in seconds).'),
|
||||
cfg.BoolOpt('disable_ipmi_check',
|
||||
default=False,
|
||||
help='''
|
||||
Do not check whether the host is completely down.
|
||||
|
||||
Possible values:
|
||||
|
||||
* True: Do not check whether the host is completely down.
|
||||
* False: Do check whether the host is completely down.
|
||||
|
||||
If ipmi RA is not set in pacemaker, this value should be set True.
|
||||
'''),
|
||||
cfg.IntOpt('ipmi_timeout',
|
||||
default=5,
|
||||
help='Timeout value(in seconds) of the ipmitool command.'),
|
||||
cfg.IntOpt('ipmi_retry_max',
|
||||
default=3,
|
||||
help='Number of ipmitool command retries.'),
|
||||
cfg.IntOpt('ipmi_retry_interval',
|
||||
default=10,
|
||||
help='Retry interval(in seconds) of the ipmitool command.'),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -16,14 +16,17 @@ import socket
|
|||
|
||||
import eventlet
|
||||
from oslo_log import log as oslo_logging
|
||||
from oslo_utils import timeutils
|
||||
|
||||
import masakarimonitors.conf
|
||||
from masakarimonitors.ha import masakari
|
||||
import masakarimonitors.hostmonitor.host_handler.driver as driver
|
||||
from masakarimonitors.hostmonitor.host_handler import hold_host_status
|
||||
from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
|
||||
from masakarimonitors.i18n import _LE
|
||||
from masakarimonitors.i18n import _LI
|
||||
from masakarimonitors.i18n import _LW
|
||||
from masakarimonitors.objects import event_constants as ec
|
||||
from masakarimonitors import utils
|
||||
|
||||
LOG = oslo_logging.getLogger(__name__)
|
||||
|
@ -41,6 +44,7 @@ class HandleHost(driver.DriverBase):
|
|||
self.my_hostname = socket.gethostname()
|
||||
self.xml_parser = parse_cib_xml.ParseCibXml()
|
||||
self.status_holder = hold_host_status.HostHoldStatus()
|
||||
self.notifier = masakari.SendNotification()
|
||||
|
||||
def _check_host_status_by_crmadmin(self):
|
||||
try:
|
||||
|
@ -81,6 +85,87 @@ class HandleHost(driver.DriverBase):
|
|||
|
||||
return out
|
||||
|
||||
def _is_poweroff(self, hostname):
|
||||
ipmi_values = self.xml_parser.get_stonith_ipmi_params(hostname)
|
||||
if ipmi_values is None:
|
||||
LOG.error(_LE("Failed to get params of ipmi RA."))
|
||||
return False
|
||||
|
||||
cmd_str = ("timeout %s ipmitool -U %s -P %s -I %s -H %s "
|
||||
"power status") \
|
||||
% (str(CONF.host.ipmi_timeout), ipmi_values['userid'],
|
||||
ipmi_values['passwd'], ipmi_values['interface'],
|
||||
ipmi_values['ipaddr'])
|
||||
command = cmd_str.split(' ')
|
||||
|
||||
retry_count = 0
|
||||
while True:
|
||||
try:
|
||||
# Execute ipmitool command.
|
||||
out, err = utils.execute(*command, run_as_root=False)
|
||||
|
||||
if err:
|
||||
msg = ("ipmitool command output stderr: %s") % err
|
||||
raise Exception(msg)
|
||||
|
||||
msg = ("ipmitool command output stdout: %s") % out
|
||||
|
||||
if 'Power is off' in out:
|
||||
LOG.info(_LI("%s"), msg)
|
||||
return True
|
||||
else:
|
||||
raise Exception(msg)
|
||||
|
||||
except Exception as e:
|
||||
if retry_count < CONF.host.ipmi_retry_max:
|
||||
LOG.warning(_LW("Retry executing ipmitool command. (%s)"),
|
||||
e)
|
||||
retry_count = retry_count + 1
|
||||
eventlet.greenthread.sleep(CONF.host.ipmi_retry_interval)
|
||||
else:
|
||||
LOG.error(_LE("Exception caught: %s"), e)
|
||||
return False
|
||||
|
||||
def _make_event(self, hostname, current_status):
|
||||
|
||||
if current_status == 'online':
|
||||
# Set values that host has started.
|
||||
event_type = ec.EventConstants.EVENT_STARTED
|
||||
cluster_status = current_status.upper()
|
||||
host_status = ec.EventConstants.HOST_STATUS_NORMAL
|
||||
|
||||
else:
|
||||
# Set values that host has stopped.
|
||||
event_type = ec.EventConstants.EVENT_STOPPED
|
||||
cluster_status = current_status.upper()
|
||||
|
||||
if not CONF.host.disable_ipmi_check:
|
||||
if self._is_poweroff(hostname):
|
||||
# Set value that host status is normal.
|
||||
host_status = ec.EventConstants.HOST_STATUS_NORMAL
|
||||
else:
|
||||
# Set value that host status is unknown.
|
||||
host_status = ec.EventConstants.HOST_STATUS_UNKNOWN
|
||||
else:
|
||||
# Set value that host status is normal.
|
||||
host_status = ec.EventConstants.HOST_STATUS_NORMAL
|
||||
|
||||
current_time = timeutils.utcnow()
|
||||
event = {
|
||||
'notification': {
|
||||
'type': ec.EventConstants.TYPE_COMPUTE_HOST,
|
||||
'hostname': hostname,
|
||||
'generated_time': current_time,
|
||||
'payload': {
|
||||
'event': event_type,
|
||||
'cluster_status': cluster_status,
|
||||
'host_status': host_status
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return event
|
||||
|
||||
def _check_if_status_changed(self, node_state_tag_list):
|
||||
|
||||
# Check if host status changed.
|
||||
|
@ -111,9 +196,22 @@ class HandleHost(driver.DriverBase):
|
|||
|
||||
# If host status changed, send a notification.
|
||||
if current_status != old_status:
|
||||
# TODO(takahara.kengo)
|
||||
# Implement the notification processing.
|
||||
pass
|
||||
if current_status != 'online' and current_status != 'offline':
|
||||
# If current_status is not 'online' or 'offline',
|
||||
# hostmonitor doesn't send a notification.
|
||||
msg = ("Since host status is '%s',"
|
||||
" hostmonitor doesn't send a notification.") \
|
||||
% current_status
|
||||
LOG.info(_LI("%s"), msg)
|
||||
else:
|
||||
event = self._make_event(node_state_tag.get('uname'),
|
||||
current_status)
|
||||
|
||||
# Send a notification.
|
||||
self.notifier.send_notification(
|
||||
CONF.host.api_retry_max,
|
||||
CONF.host.api_retry_interval,
|
||||
event)
|
||||
|
||||
# Update host status.
|
||||
self.status_holder.set_host_status(node_state_tag)
|
||||
|
|
|
@ -86,3 +86,99 @@ class ParseCibXml(object):
|
|||
LOG.error(_LE("Cib xml doesn't have node_state tag."))
|
||||
|
||||
return node_state_tag_list
|
||||
|
||||
def _parse_instance_attributes_tag(self,
|
||||
instance_attributes_tag, hostname):
|
||||
# Parse nvpair tag under the instance_attributes tag.
|
||||
is_target_ipmi = False
|
||||
ipmi_values = {}
|
||||
|
||||
nvpair_tag_list = instance_attributes_tag.getchildren()
|
||||
for nvpair_tag in nvpair_tag_list:
|
||||
if nvpair_tag.get('name') == 'hostname' and \
|
||||
nvpair_tag.get('value') == hostname:
|
||||
is_target_ipmi = True
|
||||
elif nvpair_tag.get('name') == 'ipaddr':
|
||||
ipmi_values['ipaddr'] = nvpair_tag.get('value')
|
||||
elif nvpair_tag.get('name') == 'userid':
|
||||
ipmi_values['userid'] = nvpair_tag.get('value')
|
||||
elif nvpair_tag.get('name') == 'passwd':
|
||||
ipmi_values['passwd'] = nvpair_tag.get('value')
|
||||
elif nvpair_tag.get('name') == 'interface':
|
||||
ipmi_values['interface'] = nvpair_tag.get('value')
|
||||
|
||||
if is_target_ipmi is True:
|
||||
return ipmi_values
|
||||
else:
|
||||
return None
|
||||
|
||||
def _parse_primitive_tag(self, primitive_tag, hostname):
|
||||
if primitive_tag.get('type') != 'external/ipmi':
|
||||
return None
|
||||
|
||||
# Parse instance_attributes tag under the primitive tag.
|
||||
child_list = primitive_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'instance_attributes':
|
||||
ipmi_values = self._parse_instance_attributes_tag(
|
||||
child, hostname)
|
||||
if ipmi_values is not None:
|
||||
return ipmi_values
|
||||
return None
|
||||
|
||||
def _parse_group_tag(self, group_tag, hostname):
|
||||
# Parse primitive tag under the group tag.
|
||||
child_list = group_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'primitive':
|
||||
ipmi_values = self._parse_primitive_tag(child, hostname)
|
||||
if ipmi_values is not None:
|
||||
return ipmi_values
|
||||
return None
|
||||
|
||||
def get_stonith_ipmi_params(self, hostname):
|
||||
"""Get stonith ipmi params from cib xml.
|
||||
|
||||
This method gets params of ipmi resource agent(RA) which is set on
|
||||
resources tag.
|
||||
The resources tag exsists under the configuration tag.
|
||||
And it is assumed that ipmi RA belongs to some resource group.
|
||||
|
||||
:params hostname: hostname
|
||||
|
||||
:returns: Dictionary of ipmi RA's params.
|
||||
They are ipaddr, userid, passwd and interface.
|
||||
"""
|
||||
# Get configuration tag from cib tag.
|
||||
configuration_tag = None
|
||||
child_list = self.cib_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'configuration':
|
||||
configuration_tag = child
|
||||
break
|
||||
if configuration_tag is None:
|
||||
LOG.error(_LE("Cib xml doesn't have configuration tag."))
|
||||
return None
|
||||
|
||||
# Get resources tag from configuration tag.
|
||||
resources_tag = None
|
||||
child_list = configuration_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'resources':
|
||||
resources_tag = child
|
||||
break
|
||||
if resources_tag is None:
|
||||
LOG.error(_LE("Cib xml doesn't have resources tag."))
|
||||
return None
|
||||
|
||||
# They are set at nvpair tag which exists under the
|
||||
# instance_attributes of primitive of group tag.
|
||||
ipmi_values = None
|
||||
child_list = resources_tag.getchildren()
|
||||
for child in child_list:
|
||||
if child.tag == 'group':
|
||||
ipmi_values = self._parse_group_tag(child, hostname)
|
||||
if ipmi_values is not None:
|
||||
break
|
||||
|
||||
return ipmi_values
|
||||
|
|
|
@ -23,6 +23,74 @@ from masakarimonitors.hostmonitor.host_handler import parse_cib_xml
|
|||
eventlet.monkey_patch(os=False)
|
||||
|
||||
CIB_XML = '<cib have-quorum="1">' \
|
||||
' <configuration>' \
|
||||
' <crm_config>test</crm_config>' \
|
||||
' <nodes>' \
|
||||
' <node id="1084754452" uname="masakari-node"/>' \
|
||||
' <node id="1084754453" uname="compute-node"/>' \
|
||||
' </nodes>' \
|
||||
' <resources>' \
|
||||
' <group id="grpStonith1">' \
|
||||
' <primitive id="stnt11" type="external/stonith-helper">' \
|
||||
' <instance_attributes id="stnt11-instance_attributes">' \
|
||||
' <nvpair name="hostlist" value="masakari-node"/>' \
|
||||
' </instance_attributes>' \
|
||||
' <operations>' \
|
||||
' <op name="start"/>' \
|
||||
' <op name="monitor"/>' \
|
||||
' <op name="stop"/>' \
|
||||
' </operations>' \
|
||||
' </primitive>' \
|
||||
' <primitive id="stnt12" type="external/ipmi">' \
|
||||
' <instance_attributes id="stnt12-instance_attributes">' \
|
||||
' <nvpair name="pcmk_reboot_timeout" value="60s"/>' \
|
||||
' <nvpair name="hostname" value="masakari-node"/>' \
|
||||
' <nvpair name="ipaddr" value="192.168.10.20"/>' \
|
||||
' <nvpair name="userid" value="admin"/>' \
|
||||
' <nvpair name="passwd" value="password"/>' \
|
||||
' <nvpair name="interface" value="lanplus"/>' \
|
||||
' </instance_attributes>' \
|
||||
' <operations>' \
|
||||
' <op name="start"/>' \
|
||||
' <op name="monitor"/>' \
|
||||
' <op name="stop"/>' \
|
||||
' </operations>' \
|
||||
' </primitive>' \
|
||||
' </group>' \
|
||||
' <group id="grpStonith2">' \
|
||||
' <primitive id="stnt21" type="external/stonith-helper">' \
|
||||
' <instance_attributes id="stnt21-instance_attributes">' \
|
||||
' <nvpair name="hostlist" value="compute-node"/>' \
|
||||
' </instance_attributes>' \
|
||||
' <operations>' \
|
||||
' <op name="start"/>' \
|
||||
' <op name="monitor"/>' \
|
||||
' <op name="stop"/>' \
|
||||
' </operations>' \
|
||||
' </primitive>' \
|
||||
' <primitive id="stnt22" type="external/ipmi">' \
|
||||
' <instance_attributes id="stnt22-instance_attributes">' \
|
||||
' <nvpair name="pcmk_reboot_timeout" value="60s"/>' \
|
||||
' <nvpair name="hostname" value="compute-node"/>' \
|
||||
' <nvpair name="ipaddr" value="192.168.10.21"/>' \
|
||||
' <nvpair name="userid" value="admin"/>' \
|
||||
' <nvpair name="passwd" value="password"/>' \
|
||||
' <nvpair name="interface" value="lanplus"/>' \
|
||||
' </instance_attributes>' \
|
||||
' <operations>' \
|
||||
' <op name="start"/>' \
|
||||
' <op name="monitor"/>' \
|
||||
' <op name="stop"/>' \
|
||||
' </operations>' \
|
||||
' </primitive>' \
|
||||
' </group>' \
|
||||
' </resources>' \
|
||||
' <constraints>' \
|
||||
' <rsc_location id="loc_grpStonith1" rsc="grpStonith1">' \
|
||||
' <rule test="hoge"/>' \
|
||||
' </rsc_location>' \
|
||||
' </constraints>' \
|
||||
' </configuration>' \
|
||||
' <status>' \
|
||||
' <node_state uname="masakari-node" crmd="online">' \
|
||||
' <test hoge="hoge"/>' \
|
||||
|
@ -63,3 +131,15 @@ class TestParseCibXml(testtools.TestCase):
|
|||
|
||||
for node_state_tag in node_state_tag_list:
|
||||
self.assertEqual('online', node_state_tag.get('crmd'))
|
||||
|
||||
def test_get_stonith_ipmi_params(self):
|
||||
|
||||
obj = parse_cib_xml.ParseCibXml()
|
||||
obj.set_cib_xml(CIB_XML)
|
||||
|
||||
ipmi_values = obj.get_stonith_ipmi_params('compute-node')
|
||||
|
||||
self.assertEqual('192.168.10.21', ipmi_values['ipaddr'])
|
||||
self.assertEqual('admin', ipmi_values['userid'])
|
||||
self.assertEqual('password', ipmi_values['passwd'])
|
||||
self.assertEqual('lanplus', ipmi_values['interface'])
|
||||
|
|
Loading…
Reference in New Issue