diff --git a/ocf/masakari_driver.conf b/ocf/masakari_driver.conf new file mode 100644 index 0000000..05c22c8 --- /dev/null +++ b/ocf/masakari_driver.conf @@ -0,0 +1,39 @@ +[DEFAULT] +# Name of log file. (string value) +log_file = /var/tmp/masakari_driver.log + +[api] +# Authentication URL (string value) +#auth_url = +auth_url = http://192.168.10.20/identity + +# Project name to scope to (string value) +# Deprecated group/name - [api]/tenant_name +#project_name = +project_name = service + +# Domain ID containing project (string value) +#project_domain_id = +project_domain_id = default + +# Username (string value) +# Deprecated group/name - [api]/user_name +#username = +username = masakari + +# User's domain id (string value) +#user_domain_id = +user_domain_id = default + +# User's password (string value) +#password = +password = masakari + +# Number of retries for send a notification. (integer value) +#api_retry_max = 12 +api_retry_max = 3 + +# Trial interval of time of the notification processing is error(in seconds). +# (integer value) +#api_retry_interval = 10 +api_retry_interval = 1 diff --git a/ocf/masakari_driver.py b/ocf/masakari_driver.py new file mode 100644 index 0000000..926b9b6 --- /dev/null +++ b/ocf/masakari_driver.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python + +import ConfigParser +import os +import sys + +import eventlet +from keystoneauth1.identity.generic import password as ks_password +from keystoneauth1 import session as ks_session +from openstack import connection +from openstack import exceptions +from openstack import service_description +from oslo_config import cfg +from oslo_log import log +from oslo_utils import timeutils + +from masakariclient.sdk.ha.v1 import _proxy + +LOG = log.getLogger(__name__) +CONF = cfg.CONF +DOMAIN = "masakari_driver" + +script_dir = os.path.dirname(os.path.abspath(__file__)) + +# NOTE: The config file (masakari_driver.conf) is assumed to exist +# in the same directory as this program file. +CONFIG_FILE = script_dir + "/masakari_driver.conf" + +default_config = { + 'log_file': None, + 'auth_url': None, + 'project_name': None, + 'project_domain_id': None, + 'username': None, + 'user_domain_id': None, + 'password': None, + 'api_retry_max': 12, + 'api_retry_interval': 10, +} + +TYPE_COMPUTE_HOST = "COMPUTE_HOST" +EVENT_STOPPED = "STOPPED" +CLUSTER_STATUS_OFFLINE = "OFFLINE" +HOST_STATUS_NORMAL = "NORMAL" + + +class MasakariDriver(object): + def __init__(self, failure_host): + self.failure_host = failure_host + self._read_config() + self._setup_log() + + def _read_config(self): + """Read configuration file by using ConfigParser.""" + + # NOTE: At first I attempted to use oslo.config, but it required + # either '[--config-dir DIR]' or '[--config-file PATH]' for argument, + # and the hostname couldn't be passed as an argument. + # So I use ConfigParser. + inifile = ConfigParser.SafeConfigParser(default_config) + inifile.read(CONFIG_FILE) + + self.log_file = inifile.get('DEFAULT', 'log_file') + self.auth_url = inifile.get('api', 'auth_url') + self.project_name = inifile.get('api', 'project_name') + self.project_domain_id = inifile.get('api', 'project_domain_id') + self.username = inifile.get('api', 'username') + self.user_domain_id = inifile.get('api', 'user_domain_id') + self.password = inifile.get('api', 'password') + self.api_retry_max = int(inifile.get('api', 'api_retry_max')) + self.api_retry_interval = int(inifile.get('api', 'api_retry_interval')) + + def _setup_log(self): + """Setup log""" + if self.log_file is not None: + CONF.log_file = self.log_file + + log.register_options(CONF) + log.setup(CONF, DOMAIN) + + def _make_event(self): + """Make a notification event.""" + current_time = timeutils.utcnow() + event = { + 'notification': { + 'type': TYPE_COMPUTE_HOST, + # Set hostname which was passed as argument. + 'hostname': self.failure_host, + 'generated_time': current_time, + 'payload': { + 'event': EVENT_STOPPED, + 'cluster_status': CLUSTER_STATUS_OFFLINE, + 'host_status': HOST_STATUS_NORMAL + } + } + } + + return event + + def _make_client(self): + """Make client for a notification.""" + + # NOTE: This function uses masakari-monitors's code as reference. + + auth = ks_password.Password( + auth_url=self.auth_url, + username=self.username, + password=self.password, + user_domain_id=self.user_domain_id, + project_name=self.project_name, + project_domain_id=self.project_domain_id) + session = ks_session.Session(auth=auth) + + desc = service_description.ServiceDescription( + service_type='ha', proxy_class=_proxy.Proxy) + conn = connection.Connection( + session=session, extra_services=[desc]) + conn.add_service(desc) + + client = conn.ha.proxy_class( + session=session, service_type='ha') + + return client + + def send_notification(self): + """Send a notification.""" + + # NOTE: This function uses masakari-monitors's code as reference. + + # Make event. + event = self._make_event() + LOG.info("Send a notification. %s", event) + + # Get client. + client = self._make_client() + + # Send a notification. + retry_count = 0 + while True: + try: + response = client.create_notification( + type=event['notification']['type'], + hostname=event['notification']['hostname'], + generated_time=event['notification']['generated_time'], + payload=event['notification']['payload']) + + LOG.info("Response: %s", response) + break + + except Exception as e: + if isinstance(e, exceptions.HttpException): + # If http_status is 409, skip the retry processing. + if e.status_code == 409: + msg = ("Stop retrying to send a notification because " + "same notification have been already sent.") + LOG.info("%s", msg) + break + + if retry_count < self.api_retry_max: + LOG.warning("Retry sending a notification. (%s)", e) + retry_count = retry_count + 1 + eventlet.greenthread.sleep(self.api_retry_interval) + else: + LOG.exception("Exception caught: %s", e) + break + + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Usage: %s ") + sys.exit(1) + + masakari_driver = MasakariDriver(sys.argv[1]) + masakari_driver.send_notification() + + sys.exit(0) diff --git a/ocf/nova-host-alerter b/ocf/nova-host-alerter new file mode 100644 index 0000000..5fd09c2 --- /dev/null +++ b/ocf/nova-host-alerter @@ -0,0 +1,150 @@ +#!/bin/sh +# + +# NOTE: This code is PoC. +# So please note that this is different from the real nova-host-alerter. + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Define 'driver' parameter in metadata. +meta_data() { + cat < + + +1.0 + + +hoge + + + + + + +Specify the driver of recovery. "masakari" or "mistral" is required to specify. + +Driver of recovery. + + + + + + + + + + + + +END + return $OCF_SUCCESS +} + +SERVICE=nova-host-alerter +OP=$1 +OCF_RESKEY_state=/var/tmp/nova-host-alerter.tmp + +# NOTE: It is assumed that masakari_driver.py exists in same directory with +# nova-host-alerter. +SCRIPT_DIR=$(cd $(dirname $0);pwd) +MASAKARI_DRIVER=${SCRIPT_DIR}/masakari_driver.py + +nova_host_alerter_start() { + nova_host_alerter_status + if [ $? -eq $OCF_SUCCESS ] + then + ocf_log info "`basename $0` is already running." + return $OCF_SUCCESS + fi + + touch $OCF_RESKEY_state + return $OCF_SUCCESS +} + +nova_host_alerter_stop() { + nova_host_alerter_status + RC=$? + case "$RC" in + ${OCF_NOT_RUNNING}) + ocf_log info "`basename $0` is not running." + return $OCF_SUCCESS + ;; + ${OCF_SUCCESS}) + ocf_log info "stopping..." + rm $OCF_RESKEY_state + return $OCF_SCCESS + ;; + esac + return $OCF_ERR_GENERIC +} + +nova_host_alerter_status() { + [ -f $OCF_RESKEY_state ] || return $OCF_NOT_RUNNING + + # This condition is meaningless since this code is PoC. + # Actually, it should be condition that "if nova-host-alerter detects + # a failure". + FLG_FILE=/tmp/nova-host-alerter-flg + if [ -e $FLG_FILE ]; then + case "$OCF_RESKEY_driver" in + "masakari") + # Execute masakari_driver.py. Please note that + # masakari_driver.py must have executable mode. + # The failure node is hard-corded since this code is PoC, + # but actually the failure node should be set by some + # kind of logic. Here's how to call masakari_driver.py. + FAILURE_NODE="compute-node1" + $MASAKARI_DRIVER "$FAILURE_NODE" + retval=$? + if [ $retval -eq 0 ]; then + ocf_log info "Succeeded in sending a notification." + else + ocf_log err "Failed to send a notification." + fi + rm -rf $FLG_FILE + ;; + "mistral") + # Some logics. + ;; + *) + # Some logics. + ;; + esac + fi + return $OCF_SUCCESS +} + +case $OP in +start) + nova_host_alerter_start + RC=$? + ;; +stop) + nova_host_alerter_stop + RC=$? + ;; +status) + nova_host_alerter_status + RC=$? + ;; +monitor) + nova_host_alerter_status + RC=$? + ;; +meta-data) + meta_data + RC=$? + ;; +validate-all) + RC=$OCF_SUCCESS + ;; +*) + RC=$OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $RC