[PoC]Call masakari APIs from a resource agent
This is a PoC of a resource agent that calls masakari APIs if instance's host failure is occurred. The purposes of this patch is below: - Show how to call the masakari APIs from nova-host-alerter. - Indicate the implementation of the masakari driver. This PoC is based on a idea called as "Modular architecture". But "Modular architecture" is not implemented yet. So this is just a PoC. On this patch, we assumed that nova-host-alerter has 'driver' param in the primitive definition, and that 'masakari' or 'mistral' is set for the driver param. And we plan to place masakari_driver.py and masakari_driver.conf in the same directory as nova-host-alerter. reference: * The idea of modular architecture https://aspiers.github.io/openstack-day-israel-2017-compute-ha/#/modular * Specs of a method to recover all virtual machines https://github.com/openstack/openstack-resource-agents-specs/blob/master/specs/newton/approved/newton-instance-ha-host-recovery.rst https://review.openstack.org/#/c/406659/ Change-Id: I6768a1822ed5f19bc66f0d6d6887194bbc32abad Co-Authored-By: Kengo Takahara <takahara-kn@njk.co.jp>
This commit is contained in:
parent
42bb0c53e3
commit
a9d009d29a
|
@ -0,0 +1,39 @@
|
|||
[DEFAULT]
|
||||
# Name of log file. (string value)
|
||||
log_file = /var/tmp/masakari_driver.log
|
||||
|
||||
[api]
|
||||
# Authentication URL (string value)
|
||||
#auth_url = <None>
|
||||
auth_url = http://192.168.10.20/identity
|
||||
|
||||
# Project name to scope to (string value)
|
||||
# Deprecated group/name - [api]/tenant_name
|
||||
#project_name = <None>
|
||||
project_name = service
|
||||
|
||||
# Domain ID containing project (string value)
|
||||
#project_domain_id = <None>
|
||||
project_domain_id = default
|
||||
|
||||
# Username (string value)
|
||||
# Deprecated group/name - [api]/user_name
|
||||
#username = <None>
|
||||
username = masakari
|
||||
|
||||
# User's domain id (string value)
|
||||
#user_domain_id = <None>
|
||||
user_domain_id = default
|
||||
|
||||
# User's password (string value)
|
||||
#password = <None>
|
||||
password = masakari
|
||||
|
||||
# Number of retries for send a notification. (integer value)
|
||||
#api_retry_max = 12
|
||||
api_retry_max = 3
|
||||
|
||||
# Trial interval of time of the notification processing is error(in seconds).
|
||||
# (integer value)
|
||||
#api_retry_interval = 10
|
||||
api_retry_interval = 1
|
|
@ -0,0 +1,176 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import ConfigParser
|
||||
import os
|
||||
import sys
|
||||
|
||||
import eventlet
|
||||
from keystoneauth1.identity.generic import password as ks_password
|
||||
from keystoneauth1 import session as ks_session
|
||||
from openstack import connection
|
||||
from openstack import exceptions
|
||||
from openstack import service_description
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
from oslo_utils import timeutils
|
||||
|
||||
from masakariclient.sdk.ha.v1 import _proxy
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
DOMAIN = "masakari_driver"
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# NOTE: The config file (masakari_driver.conf) is assumed to exist
|
||||
# in the same directory as this program file.
|
||||
CONFIG_FILE = script_dir + "/masakari_driver.conf"
|
||||
|
||||
default_config = {
|
||||
'log_file': None,
|
||||
'auth_url': None,
|
||||
'project_name': None,
|
||||
'project_domain_id': None,
|
||||
'username': None,
|
||||
'user_domain_id': None,
|
||||
'password': None,
|
||||
'api_retry_max': 12,
|
||||
'api_retry_interval': 10,
|
||||
}
|
||||
|
||||
TYPE_COMPUTE_HOST = "COMPUTE_HOST"
|
||||
EVENT_STOPPED = "STOPPED"
|
||||
CLUSTER_STATUS_OFFLINE = "OFFLINE"
|
||||
HOST_STATUS_NORMAL = "NORMAL"
|
||||
|
||||
|
||||
class MasakariDriver(object):
|
||||
def __init__(self, failure_host):
|
||||
self.failure_host = failure_host
|
||||
self._read_config()
|
||||
self._setup_log()
|
||||
|
||||
def _read_config(self):
|
||||
"""Read configuration file by using ConfigParser."""
|
||||
|
||||
# NOTE: At first I attempted to use oslo.config, but it required
|
||||
# either '[--config-dir DIR]' or '[--config-file PATH]' for argument,
|
||||
# and the hostname couldn't be passed as an argument.
|
||||
# So I use ConfigParser.
|
||||
inifile = ConfigParser.SafeConfigParser(default_config)
|
||||
inifile.read(CONFIG_FILE)
|
||||
|
||||
self.log_file = inifile.get('DEFAULT', 'log_file')
|
||||
self.auth_url = inifile.get('api', 'auth_url')
|
||||
self.project_name = inifile.get('api', 'project_name')
|
||||
self.project_domain_id = inifile.get('api', 'project_domain_id')
|
||||
self.username = inifile.get('api', 'username')
|
||||
self.user_domain_id = inifile.get('api', 'user_domain_id')
|
||||
self.password = inifile.get('api', 'password')
|
||||
self.api_retry_max = int(inifile.get('api', 'api_retry_max'))
|
||||
self.api_retry_interval = int(inifile.get('api', 'api_retry_interval'))
|
||||
|
||||
def _setup_log(self):
|
||||
"""Setup log"""
|
||||
if self.log_file is not None:
|
||||
CONF.log_file = self.log_file
|
||||
|
||||
log.register_options(CONF)
|
||||
log.setup(CONF, DOMAIN)
|
||||
|
||||
def _make_event(self):
|
||||
"""Make a notification event."""
|
||||
current_time = timeutils.utcnow()
|
||||
event = {
|
||||
'notification': {
|
||||
'type': TYPE_COMPUTE_HOST,
|
||||
# Set hostname which was passed as argument.
|
||||
'hostname': self.failure_host,
|
||||
'generated_time': current_time,
|
||||
'payload': {
|
||||
'event': EVENT_STOPPED,
|
||||
'cluster_status': CLUSTER_STATUS_OFFLINE,
|
||||
'host_status': HOST_STATUS_NORMAL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return event
|
||||
|
||||
def _make_client(self):
|
||||
"""Make client for a notification."""
|
||||
|
||||
# NOTE: This function uses masakari-monitors's code as reference.
|
||||
|
||||
auth = ks_password.Password(
|
||||
auth_url=self.auth_url,
|
||||
username=self.username,
|
||||
password=self.password,
|
||||
user_domain_id=self.user_domain_id,
|
||||
project_name=self.project_name,
|
||||
project_domain_id=self.project_domain_id)
|
||||
session = ks_session.Session(auth=auth)
|
||||
|
||||
desc = service_description.ServiceDescription(
|
||||
service_type='ha', proxy_class=_proxy.Proxy)
|
||||
conn = connection.Connection(
|
||||
session=session, extra_services=[desc])
|
||||
conn.add_service(desc)
|
||||
|
||||
client = conn.ha.proxy_class(
|
||||
session=session, service_type='ha')
|
||||
|
||||
return client
|
||||
|
||||
def send_notification(self):
|
||||
"""Send a notification."""
|
||||
|
||||
# NOTE: This function uses masakari-monitors's code as reference.
|
||||
|
||||
# Make event.
|
||||
event = self._make_event()
|
||||
LOG.info("Send a notification. %s", event)
|
||||
|
||||
# Get client.
|
||||
client = self._make_client()
|
||||
|
||||
# Send a notification.
|
||||
retry_count = 0
|
||||
while True:
|
||||
try:
|
||||
response = client.create_notification(
|
||||
type=event['notification']['type'],
|
||||
hostname=event['notification']['hostname'],
|
||||
generated_time=event['notification']['generated_time'],
|
||||
payload=event['notification']['payload'])
|
||||
|
||||
LOG.info("Response: %s", response)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, exceptions.HttpException):
|
||||
# If http_status is 409, skip the retry processing.
|
||||
if e.status_code == 409:
|
||||
msg = ("Stop retrying to send a notification because "
|
||||
"same notification have been already sent.")
|
||||
LOG.info("%s", msg)
|
||||
break
|
||||
|
||||
if retry_count < self.api_retry_max:
|
||||
LOG.warning("Retry sending a notification. (%s)", e)
|
||||
retry_count = retry_count + 1
|
||||
eventlet.greenthread.sleep(self.api_retry_interval)
|
||||
else:
|
||||
LOG.exception("Exception caught: %s", e)
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: %s <failure hostname>")
|
||||
sys.exit(1)
|
||||
|
||||
masakari_driver = MasakariDriver(sys.argv[1])
|
||||
masakari_driver.send_notification()
|
||||
|
||||
sys.exit(0)
|
|
@ -0,0 +1,150 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
|
||||
# NOTE: This code is PoC.
|
||||
# So please note that this is different from the real nova-host-alerter.
|
||||
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||||
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||||
|
||||
# Define 'driver' parameter in metadata.
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="nova-host-alerter">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
hoge
|
||||
</longdesc>
|
||||
<shortdesc lang="en"></shortdesc>
|
||||
|
||||
<parameters>
|
||||
<parameter name="driver" unique="1" required="1">
|
||||
<longdesc lang="en">
|
||||
Specify the driver of recovery. "masakari" or "mistral" is required to specify.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Driver of recovery.</shortdesc>
|
||||
<content type="string" />
|
||||
</parameter>
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="60" />
|
||||
<action name="stop" timeout="60" />
|
||||
<action name="status" timeout="60" />
|
||||
<action name="monitor" interval="60" timeout="60" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
SERVICE=nova-host-alerter
|
||||
OP=$1
|
||||
OCF_RESKEY_state=/var/tmp/nova-host-alerter.tmp
|
||||
|
||||
# NOTE: It is assumed that masakari_driver.py exists in same directory with
|
||||
# nova-host-alerter.
|
||||
SCRIPT_DIR=$(cd $(dirname $0);pwd)
|
||||
MASAKARI_DRIVER=${SCRIPT_DIR}/masakari_driver.py
|
||||
|
||||
nova_host_alerter_start() {
|
||||
nova_host_alerter_status
|
||||
if [ $? -eq $OCF_SUCCESS ]
|
||||
then
|
||||
ocf_log info "`basename $0` is already running."
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
touch $OCF_RESKEY_state
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
nova_host_alerter_stop() {
|
||||
nova_host_alerter_status
|
||||
RC=$?
|
||||
case "$RC" in
|
||||
${OCF_NOT_RUNNING})
|
||||
ocf_log info "`basename $0` is not running."
|
||||
return $OCF_SUCCESS
|
||||
;;
|
||||
${OCF_SUCCESS})
|
||||
ocf_log info "stopping..."
|
||||
rm $OCF_RESKEY_state
|
||||
return $OCF_SCCESS
|
||||
;;
|
||||
esac
|
||||
return $OCF_ERR_GENERIC
|
||||
}
|
||||
|
||||
nova_host_alerter_status() {
|
||||
[ -f $OCF_RESKEY_state ] || return $OCF_NOT_RUNNING
|
||||
|
||||
# This condition is meaningless since this code is PoC.
|
||||
# Actually, it should be condition that "if nova-host-alerter detects
|
||||
# a failure".
|
||||
FLG_FILE=/tmp/nova-host-alerter-flg
|
||||
if [ -e $FLG_FILE ]; then
|
||||
case "$OCF_RESKEY_driver" in
|
||||
"masakari")
|
||||
# Execute masakari_driver.py. Please note that
|
||||
# masakari_driver.py must have executable mode.
|
||||
# The failure node is hard-corded since this code is PoC,
|
||||
# but actually the failure node should be set by some
|
||||
# kind of logic. Here's how to call masakari_driver.py.
|
||||
FAILURE_NODE="compute-node1"
|
||||
$MASAKARI_DRIVER "$FAILURE_NODE"
|
||||
retval=$?
|
||||
if [ $retval -eq 0 ]; then
|
||||
ocf_log info "Succeeded in sending a notification."
|
||||
else
|
||||
ocf_log err "Failed to send a notification."
|
||||
fi
|
||||
rm -rf $FLG_FILE
|
||||
;;
|
||||
"mistral")
|
||||
# Some logics.
|
||||
;;
|
||||
*)
|
||||
# Some logics.
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
case $OP in
|
||||
start)
|
||||
nova_host_alerter_start
|
||||
RC=$?
|
||||
;;
|
||||
stop)
|
||||
nova_host_alerter_stop
|
||||
RC=$?
|
||||
;;
|
||||
status)
|
||||
nova_host_alerter_status
|
||||
RC=$?
|
||||
;;
|
||||
monitor)
|
||||
nova_host_alerter_status
|
||||
RC=$?
|
||||
;;
|
||||
meta-data)
|
||||
meta_data
|
||||
RC=$?
|
||||
;;
|
||||
validate-all)
|
||||
RC=$OCF_SUCCESS
|
||||
;;
|
||||
*)
|
||||
RC=$OCF_ERR_UNIMPLEMENTED
|
||||
;;
|
||||
esac
|
||||
exit $RC
|
Loading…
Reference in New Issue