[PoC]Call masakari APIs from a resource agent

This is a PoC of a resource agent that calls masakari APIs if
instance's host failure is occurred.
The purposes of this patch is below:
- Show how to call the masakari APIs from nova-host-alerter.
- Indicate the implementation of the masakari driver.

This PoC is based on a idea called as "Modular architecture".
But "Modular architecture" is not implemented yet. So this is just
a PoC.

On this patch, we assumed that nova-host-alerter has 'driver' param
in the primitive definition, and that 'masakari' or 'mistral' is
set for the driver param.
And we plan to place masakari_driver.py and masakari_driver.conf
in the same directory as nova-host-alerter.

reference:
* The idea of modular architecture
  https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/modular
* Specs of a method to recover all virtual machines
  https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-recovery.rst
  https://review.openstack.org/#/c/406659/

Change-Id: I6768a1822ed5f19bc66f0d6d6887194bbc32abad
Co-Authored-By: Kengo Takahara <takahara-kn@njk.co.jp>
This commit is contained in:
Rikimaru Honjo 2018-03-15 09:02:15 +00:00
parent 42bb0c53e3
commit a9d009d29a
3 changed files with 365 additions and 0 deletions

39
ocf/masakari_driver.conf Normal file
View File

@ -0,0 +1,39 @@
[DEFAULT]
# Name of log file. (string value)
log_file = /var/tmp/masakari_driver.log
[api]
# Authentication URL (string value)
#auth_url = <None>
auth_url = http://192.168.10.20/identity
# Project name to scope to (string value)
# Deprecated group/name - [api]/tenant_name
#project_name = <None>
project_name = service
# Domain ID containing project (string value)
#project_domain_id = <None>
project_domain_id = default
# Username (string value)
# Deprecated group/name - [api]/user_name
#username = <None>
username = masakari
# User's domain id (string value)
#user_domain_id = <None>
user_domain_id = default
# User's password (string value)
#password = <None>
password = masakari
# Number of retries for send a notification. (integer value)
#api_retry_max = 12
api_retry_max = 3
# Trial interval of time of the notification processing is error(in seconds).
# (integer value)
#api_retry_interval = 10
api_retry_interval = 1

176
ocf/masakari_driver.py Normal file
View File

@ -0,0 +1,176 @@
#!/usr/bin/env python
import ConfigParser
import os
import sys
import eventlet
from keystoneauth1.identity.generic import password as ks_password
from keystoneauth1 import session as ks_session
from openstack import connection
from openstack import exceptions
from openstack import service_description
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils
from masakariclient.sdk.ha.v1 import _proxy
LOG = log.getLogger(__name__)
CONF = cfg.CONF
DOMAIN = "masakari_driver"
script_dir = os.path.dirname(os.path.abspath(__file__))
# NOTE: The config file (masakari_driver.conf) is assumed to exist
# in the same directory as this program file.
CONFIG_FILE = script_dir + "/masakari_driver.conf"
default_config = {
'log_file': None,
'auth_url': None,
'project_name': None,
'project_domain_id': None,
'username': None,
'user_domain_id': None,
'password': None,
'api_retry_max': 12,
'api_retry_interval': 10,
}
TYPE_COMPUTE_HOST = "COMPUTE_HOST"
EVENT_STOPPED = "STOPPED"
CLUSTER_STATUS_OFFLINE = "OFFLINE"
HOST_STATUS_NORMAL = "NORMAL"
class MasakariDriver(object):
def __init__(self, failure_host):
self.failure_host = failure_host
self._read_config()
self._setup_log()
def _read_config(self):
"""Read configuration file by using ConfigParser."""
# NOTE: At first I attempted to use oslo.config, but it required
# either '[--config-dir DIR]' or '[--config-file PATH]' for argument,
# and the hostname couldn't be passed as an argument.
# So I use ConfigParser.
inifile = ConfigParser.SafeConfigParser(default_config)
inifile.read(CONFIG_FILE)
self.log_file = inifile.get('DEFAULT', 'log_file')
self.auth_url = inifile.get('api', 'auth_url')
self.project_name = inifile.get('api', 'project_name')
self.project_domain_id = inifile.get('api', 'project_domain_id')
self.username = inifile.get('api', 'username')
self.user_domain_id = inifile.get('api', 'user_domain_id')
self.password = inifile.get('api', 'password')
self.api_retry_max = int(inifile.get('api', 'api_retry_max'))
self.api_retry_interval = int(inifile.get('api', 'api_retry_interval'))
def _setup_log(self):
"""Setup log"""
if self.log_file is not None:
CONF.log_file = self.log_file
log.register_options(CONF)
log.setup(CONF, DOMAIN)
def _make_event(self):
"""Make a notification event."""
current_time = timeutils.utcnow()
event = {
'notification': {
'type': TYPE_COMPUTE_HOST,
# Set hostname which was passed as argument.
'hostname': self.failure_host,
'generated_time': current_time,
'payload': {
'event': EVENT_STOPPED,
'cluster_status': CLUSTER_STATUS_OFFLINE,
'host_status': HOST_STATUS_NORMAL
}
}
}
return event
def _make_client(self):
"""Make client for a notification."""
# NOTE: This function uses masakari-monitors's code as reference.
auth = ks_password.Password(
auth_url=self.auth_url,
username=self.username,
password=self.password,
user_domain_id=self.user_domain_id,
project_name=self.project_name,
project_domain_id=self.project_domain_id)
session = ks_session.Session(auth=auth)
desc = service_description.ServiceDescription(
service_type='ha', proxy_class=_proxy.Proxy)
conn = connection.Connection(
session=session, extra_services=[desc])
conn.add_service(desc)
client = conn.ha.proxy_class(
session=session, service_type='ha')
return client
def send_notification(self):
"""Send a notification."""
# NOTE: This function uses masakari-monitors's code as reference.
# Make event.
event = self._make_event()
LOG.info("Send a notification. %s", event)
# Get client.
client = self._make_client()
# Send a notification.
retry_count = 0
while True:
try:
response = client.create_notification(
type=event['notification']['type'],
hostname=event['notification']['hostname'],
generated_time=event['notification']['generated_time'],
payload=event['notification']['payload'])
LOG.info("Response: %s", response)
break
except Exception as e:
if isinstance(e, exceptions.HttpException):
# If http_status is 409, skip the retry processing.
if e.status_code == 409:
msg = ("Stop retrying to send a notification because "
"same notification have been already sent.")
LOG.info("%s", msg)
break
if retry_count < self.api_retry_max:
LOG.warning("Retry sending a notification. (%s)", e)
retry_count = retry_count + 1
eventlet.greenthread.sleep(self.api_retry_interval)
else:
LOG.exception("Exception caught: %s", e)
break
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: %s <failure hostname>")
sys.exit(1)
masakari_driver = MasakariDriver(sys.argv[1])
masakari_driver.send_notification()
sys.exit(0)

150
ocf/nova-host-alerter Normal file
View File

@ -0,0 +1,150 @@
#!/bin/sh
#
# NOTE: This code is PoC.
# So please note that this is different from the real nova-host-alerter.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Define 'driver' parameter in metadata.
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nova-host-alerter">
<version>1.0</version>
<longdesc lang="en">
hoge
</longdesc>
<shortdesc lang="en"></shortdesc>
<parameters>
<parameter name="driver" unique="1" required="1">
<longdesc lang="en">
Specify the driver of recovery. "masakari" or "mistral" is required to specify.
</longdesc>
<shortdesc lang="en">Driver of recovery.</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="status" timeout="60" />
<action name="monitor" interval="60" timeout="60" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
SERVICE=nova-host-alerter
OP=$1
OCF_RESKEY_state=/var/tmp/nova-host-alerter.tmp
# NOTE: It is assumed that masakari_driver.py exists in same directory with
# nova-host-alerter.
SCRIPT_DIR=$(cd $(dirname $0);pwd)
MASAKARI_DRIVER=${SCRIPT_DIR}/masakari_driver.py
nova_host_alerter_start() {
nova_host_alerter_status
if [ $? -eq $OCF_SUCCESS ]
then
ocf_log info "`basename $0` is already running."
return $OCF_SUCCESS
fi
touch $OCF_RESKEY_state
return $OCF_SUCCESS
}
nova_host_alerter_stop() {
nova_host_alerter_status
RC=$?
case "$RC" in
${OCF_NOT_RUNNING})
ocf_log info "`basename $0` is not running."
return $OCF_SUCCESS
;;
${OCF_SUCCESS})
ocf_log info "stopping..."
rm $OCF_RESKEY_state
return $OCF_SCCESS
;;
esac
return $OCF_ERR_GENERIC
}
nova_host_alerter_status() {
[ -f $OCF_RESKEY_state ] || return $OCF_NOT_RUNNING
# This condition is meaningless since this code is PoC.
# Actually, it should be condition that "if nova-host-alerter detects
# a failure".
FLG_FILE=/tmp/nova-host-alerter-flg
if [ -e $FLG_FILE ]; then
case "$OCF_RESKEY_driver" in
"masakari")
# Execute masakari_driver.py. Please note that
# masakari_driver.py must have executable mode.
# The failure node is hard-corded since this code is PoC,
# but actually the failure node should be set by some
# kind of logic. Here's how to call masakari_driver.py.
FAILURE_NODE="compute-node1"
$MASAKARI_DRIVER "$FAILURE_NODE"
retval=$?
if [ $retval -eq 0 ]; then
ocf_log info "Succeeded in sending a notification."
else
ocf_log err "Failed to send a notification."
fi
rm -rf $FLG_FILE
;;
"mistral")
# Some logics.
;;
*)
# Some logics.
;;
esac
fi
return $OCF_SUCCESS
}
case $OP in
start)
nova_host_alerter_start
RC=$?
;;
stop)
nova_host_alerter_stop
RC=$?
;;
status)
nova_host_alerter_status
RC=$?
;;
monitor)
nova_host_alerter_status
RC=$?
;;
meta-data)
meta_data
RC=$?
;;
validate-all)
RC=$OCF_SUCCESS
;;
*)
RC=$OCF_ERR_UNIMPLEMENTED
;;
esac
exit $RC