[PoC] Call masakari APIs from a resource agent

This is a PoC of a resource agent that calls masakari APIs if
instance's host failure is occurred.
The purposes of this patch is below:
- Show how to call the masakari APIs from nova-host-alerter.
- Indicate the implementation of the masakari driver.

This is just a PoC based on an idea called as "Modular architecture".

On this patch, we assumed that nova-host-alerter has 'driver' param
in the primitive definition, and that 'masakari' or 'mistral' is
set for the driver param.
And we plan to place masakari_driver.py and masakari_driver.conf
in the same directory as nova-host-alerter.

reference:
* The idea of modular architecture
  https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/nova-host-alerter
  https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/modular
* Specs of a method to recover all virtual machines
  https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-monitoring-spec.rst
  https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-recovery.rst
  https://review.openstack.org/#/c/406659/

Change-Id: I6768a1822ed5f19bc66f0d6d6887194bbc32abad
Co-Authored-By: Kengo Takahara <takahara-kn@njk.co.jp>
Story: 2002124
Task: 19803
This commit is contained in:
Rikimaru Honjo 2018-03-15 09:02:15 +00:00 committed by Adam Spiers
parent 42bb0c53e3
commit 5701a26a63
3 changed files with 331 additions and 0 deletions

39
ocf/masakari_driver.conf Normal file
View File

@ -0,0 +1,39 @@
[DEFAULT]
# Name of log file. (string value)
log_file = /var/tmp/masakari_driver.log
[api]
# Authentication URL (string value)
#auth_url = <None>
auth_url = http://192.168.10.20/identity
# Project name to scope to (string value)
# Deprecated group/name - [api]/tenant_name
#project_name = <None>
project_name = service
# Domain ID containing project (string value)
#project_domain_id = <None>
project_domain_id = default
# Username (string value)
# Deprecated group/name - [api]/user_name
#username = <None>
username = masakari
# User's domain id (string value)
#user_domain_id = <None>
user_domain_id = default
# User's password (string value)
#password = <None>
password = masakari
# Number of retries for send a notification. (integer value)
#api_retry_max = 12
api_retry_max = 3
# Trial interval of time of the notification processing is error(in seconds).
# (integer value)
#api_retry_interval = 10
api_retry_interval = 1

177
ocf/masakari_driver.py Normal file
View File

@ -0,0 +1,177 @@
#!/usr/bin/env python
import ConfigParser
import os
import sys
import eventlet
from keystoneauth1.identity.generic import password as ks_password
from keystoneauth1 import session as ks_session
from openstack import connection
from openstack import exceptions
from openstack import service_description
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils
from masakariclient.sdk.ha.v1 import _proxy
LOG = log.getLogger(__name__)
CONF = cfg.CONF
DOMAIN = "masakari_driver"
script_dir = os.path.dirname(os.path.abspath(__file__))
# NOTE: The config file (masakari_driver.conf) is assumed to exist
# in the same directory as this program file.
CONFIG_FILE = script_dir + "/masakari_driver.conf"
default_config = {
'log_file': None,
'auth_url': None,
'project_name': None,
'project_domain_id': None,
'username': None,
'user_domain_id': None,
'password': None,
'api_retry_max': 12,
'api_retry_interval': 10,
}
TYPE_COMPUTE_HOST = "COMPUTE_HOST"
EVENT_STOPPED = "STOPPED"
CLUSTER_STATUS_OFFLINE = "OFFLINE"
HOST_STATUS_NORMAL = "NORMAL"
SUCCESS = 0
FAILURE = 1
class MasakariDriver(object):
def __init__(self):
self._read_config()
self._setup_log()
def _read_config(self):
"""Read configuration file by using ConfigParser."""
# NOTE: At first I attempted to use oslo.config, but it required
# either '[--config-dir DIR]' or '[--config-file PATH]' for argument,
# and the hostname couldn't be passed as an argument.
# So I use ConfigParser.
inifile = ConfigParser.SafeConfigParser(default_config)
inifile.read(CONFIG_FILE)
self.log_file = inifile.get('DEFAULT', 'log_file')
self.auth_url = inifile.get('api', 'auth_url')
self.project_name = inifile.get('api', 'project_name')
self.project_domain_id = inifile.get('api', 'project_domain_id')
self.username = inifile.get('api', 'username')
self.user_domain_id = inifile.get('api', 'user_domain_id')
self.password = inifile.get('api', 'password')
self.api_retry_max = int(inifile.get('api', 'api_retry_max'))
self.api_retry_interval = int(inifile.get('api', 'api_retry_interval'))
def _setup_log(self):
"""Setup log"""
if self.log_file is not None:
CONF.log_file = self.log_file
log.register_options(CONF)
log.setup(CONF, DOMAIN)
def _make_client(self):
"""Make client for a notification."""
# NOTE: This function uses masakari-monitors's code as reference.
auth = ks_password.Password(
auth_url=self.auth_url,
username=self.username,
password=self.password,
user_domain_id=self.user_domain_id,
project_name=self.project_name,
project_domain_id=self.project_domain_id)
session = ks_session.Session(auth=auth)
desc = service_description.ServiceDescription(
service_type='ha', proxy_class=_proxy.Proxy)
conn = connection.Connection(
session=session, extra_services=[desc])
conn.add_service(desc)
client = conn.ha.proxy_class(
session=session, service_type='ha')
return client
def send_notification(self, failure_host):
"""Send a notification."""
# NOTE: This function uses masakari-monitors's code as reference.
# Make event.
current_time = timeutils.utcnow()
event = {
'notification': {
'type': TYPE_COMPUTE_HOST,
# Set hostname which was passed as argument.
'hostname': failure_host,
'generated_time': current_time,
'payload': {
'event': EVENT_STOPPED,
'cluster_status': CLUSTER_STATUS_OFFLINE,
'host_status': HOST_STATUS_NORMAL
}
}
}
LOG.info("Send a notification. %s", event)
# Get client.
client = self._make_client()
# Send a notification.
retry_count = 0
while True:
try:
retval = SUCCESS
response = client.create_notification(
type=event['notification']['type'],
hostname=event['notification']['hostname'],
generated_time=event['notification']['generated_time'],
payload=event['notification']['payload'])
LOG.info("Response: %s", response)
break
except Exception as e:
if isinstance(e, exceptions.HttpException):
# If http_status is 409, skip the retry processing.
if e.status_code == 409:
msg = ("Stop retrying to send a notification because "
"same notification have been already sent.")
LOG.info("%s", msg)
break
retval = FAILURE
if retry_count < self.api_retry_max:
LOG.warning("Retry sending a notification. (%s)", e)
retry_count = retry_count + 1
eventlet.greenthread.sleep(self.api_retry_interval)
else:
LOG.exception("Exception caught: %s", e)
break
return retval
if __name__ == '__main__':
retval = 0
hosts = sys.stdin.read().splitlines()
if len(hosts) > 0:
masakari_driver = MasakariDriver()
for host in hosts:
retval += masakari_driver.send_notification(host)
# Exit code is number of notification failure.
sys.exit(retval)

115
ocf/nova-host-alerter Normal file
View File

@ -0,0 +1,115 @@
#!/bin/sh
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Define 'driver' parameter in metadata.
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nova-host-alerter">
<version>1.0</version>
<longdesc lang="en">
hoge
</longdesc>
<shortdesc lang="en"></shortdesc>
<parameters>
<parameter name="driver" unique="1" required="1">
<longdesc lang="en">
Specify the driver of recovery. Currently, only "masakari" is supported.
</longdesc>
<shortdesc lang="en">Driver of recovery.</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="status" timeout="60" />
<action name="monitor" interval="60" timeout="60" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
SERVICE=nova-host-alerter
OP=$1
# NOTE: It is assumed that masakari_driver.py exists in same directory with
# nova-host-alerter.
SCRIPT_DIR=$(cd $(dirname $0);pwd)
MASAKARI_DRIVER=${SCRIPT_DIR}/masakari_driver.py
nova_host_alerter_start() {
touch "$statefile"
# Do not involve monitor here so that the start timeout can be low
return $?
}
nova_host_alerter_stop() {
rm -f "$statefile"
return $OCF_SUCCESS
}
nova_host_alerter_monitor() {
case "$OCF_RESKEY_driver" in
"masakari")
# Pass the list of failure hostnames to MASAKARI_DRIVERS.
attrd_updater -n evacuate -A \
2> >(grep -v "attribute does not exist" 1>&2) |
sed 's/ value=""/ value="no"/' |
tr '="' ' ' |
awk '{print $4" "$6}' |
$MASAKARI_DRIVER
retval=$?
if [ $retval -eq 0 ]; then
ocf_log info "Succeeded in sending a notification."
else
ocf_log err "$retval host failure notification(s) failed."
fi
;;
*)
ocf_log err "Driver type '$OCF_RESKEY_driver' is not supported."
return $OCF_ERR_CONFIGURED
;;
esac
return $OCF_SUCCESS
}
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
case $OP in
start)
nova_host_alerter_start
RC=$?
;;
stop)
nova_host_alerter_stop
RC=$?
;;
monitor)
nova_host_alerter_monitor
RC=$?
;;
meta-data)
meta_data
RC=$?
;;
validate-all)
RC=$OCF_SUCCESS
;;
*)
RC=$OCF_ERR_UNIMPLEMENTED
;;
esac
exit $RC