Adding Notification System and Notification Drivers

Support sending notification emails in case of any problem happened

Change-Id: I44fd14afc8003bfdeecb64f4402f386920279a1e
This commit is contained in:
Saad Zaher 2016-01-13 14:03:22 +00:00
parent 144067babf
commit 61d1674186
24 changed files with 864 additions and 320 deletions

View File

@ -156,27 +156,34 @@
# available plugin for the time being (string value)
#auth_plugin = <None>
# Openstack Project Domain id, default is Default (string value)
#project_domain_id = Default
# Openstack user Domain id, default is Default (string value)
#user_domain_id = Default
# Openstack Project Domain name, default is Default (string value)
#project_domain_name = Default
# Openstack user Domain name, default is Default (string value)
#user_domain_name = Default
# Openstack Project Name. (string value)
#project_name = services
# Openstack username (string value)
#username = <None>
# Openstack Password (string value)
#password = <None>
# Openstack Project Name. (string value)
#project_name = <None>
# Openstack domain Name. (string value)
#domain_name = <None>
# Openstack Project Domain id, default is Default (string value)
#project_domain_id = <None>
# Openstack user Domain id, default is Default (string value)
#user_domain_id = <None>
# Openstack Project Domain name, default is Default (string value)
#project_domain_name = <None>
# Openstack user Domain name, default is Default (string value)
#user_domain_name = <None>
# Openstack Authentication arguments you can pass it here as Key:Value,
# Key1:Value1, ... (dict value)
#kwargs =
[monitoring]
@ -199,3 +206,44 @@
# List of kwargs if you want to pass it to initialize the monitoring driver.
# should be provided in key:value format (dict value)
#kwargs =
[notifiers]
#
# From osha
#
# Notification driver to load it to notify users if something went wrong
# (string value)
#driver = osha.notifiers.drivers.osha.default_email.OshaEmail
# Endpoint URL for the notification system. If you the driver you are using
# doesnot require any URL just comment it or use none (string value)
#endpoint = <None>
# Username to authenticate against the notification system. If the driver you
# are using doesnot require any authentications comment or use None (string
# value)
#username = <None>
# Password to authenticate against the notification system. If the driver you
# are using doesnot require any authentications comment or use None (string
# value)
#password = <None>
# Path to Jinja2 templates directory that contains message templates (string
# value)
#templates-dir = /etc/osha/templates
# Key:Value Kwargs to pass it to the notification driver, if you want to pass
# any special arguments for your driver. (dict value)
#options =
# List of emails to sent them notification if something went wrong and Osha
# wasnot able to send an email to the tenant admin (list value)
#notify-list =
# The sender address, it can be email address if we used default email driver,
# or phone number if we use sms gateway for example. (string value)
#notify-from = <None>

62
etc/templates/error.jinja Normal file
View File

@ -0,0 +1,62 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>{{ title }}</title>
</head>
<body>
<div id="content">
<p>Dear Administrators, <br />
An compute node went down and Osha DID NOT successfully evacuate. Please, find the following details about the host: <br />
Host: {{ host }} <br />
<p>
Tenants:
{% for tenant in tenants %}
{{ tenant.get('id') }} <br />
{% endfor %}
</p>
<br />
<p>
Instances: <br />
<table>
<tr>
<td>
Instance Name
</td>
<td>
IP
</td>
</tr>
{% for instance in instances %}
<tr>
<td> {{ instance.get('name') }} </td>
<td> {{ instance.get('addresses').get('internal')[0].get('addr') }} </td>
</tr>
{% endfor %}
</table>
</p>
<p>
Host INFO:
<table>
{% for key, value in hypervisor.iteritems() %}
<tr>
<td> {{ key }} </td>
<td> {{ value }} </td>
</tr>
{% endfor %}
</table>
</p>
TimeStamp: {{ evacuation_time }} <br />
</p>
<br />
<p>
Thanks for using <b>Osha !</b>
</p>
</div>
</body>
</html>

View File

@ -0,0 +1,62 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>{{ title }}</title>
</head>
<body>
<div id="content">
<p>Dear Administrators, <br />
An compute node went down and Osha did successfully evacuate all instances successfully. Please, find the following details about the evacuated host: <br />
Host: {{ host }} <br />
<p>
Tenants:
{% for tenant in tenants %}
{{ tenant.get('id') }} <br />
{% endfor %}
</p>
<br />
<p>
Instances: <br />
<table>
<tr>
<td>
Instance Name
</td>
<td>
IP
</td>
</tr>
{% for instance in instances %}
<tr>
<td> {{ instance.get('name') }} </td>
<td> {{ instance.get('addresses').get('internal')[0].get('addr') }} </td>
</tr>
{% endfor %}
</table>
</p>
<p>
Host INFO:
<table>
{% for key, value in hypervisor.iteritems() %}
<tr>
<td> {{ key }} </td>
<td> {{ value }} </td>
</tr>
{% endfor %}
</table>
</p>
TimeStamp: {{ evacuation_time }} <br />
</p>
<br />
<p>
Thanks for using <b>Osha !</b>
</p>
</div>
</body>
</html>

View File

@ -0,0 +1,40 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>{{ title }}</title>
</head>
<body>
<div id="content">
<p>Dear {{ name }}, <br />
One of our compute nodes failed due to some technical problem. Your instances (listed below) are running in tenant {{ tenant }} Failed to Evacuate <br />
Instances: <br />
<table>
<tr>
<td>
Instance Name
</td>
<td>
IP
</td>
</tr>
{% for instance in instances%}
<tr>
<td> {{ instance.get('name') }} </td>
<td> {{ instance.get('addresses').get('internal')[0].get('addr') }} </td>
</tr>
{% endfor %}
</table>
TimeStamp: {{ evacuation_time }} <br />
</p>
<br />
<p>
Thanks for using <b>Osha !</b>
</p>
</div>
</body>
</html>

View File

@ -0,0 +1,40 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>{{ title }}</title>
</head>
<body>
<div id="content">
<p>Dear {{ name }}, <br />
One of our compute nodes failed due to some technical problem. Your instances (listed below) running in tenant {{ tenant }} Evacuated to another compute host <br />
Instances: <br />
<table>
<tr>
<td>
Instance Name
</td>
<td>
IP
</td>
</tr>
{% for instance in instances%}
<tr>
<td> {{ instance.get('name') }} </td>
<td> {{ instance.get('addresses').get('internal')[0].get('addr') }} </td>
</tr>
{% endfor %}
</table>
TimeStamp: {{ evacuation_time }} <br />
</p>
<br />
<p>
Thanks for using <b>Osha !</b>
</p>
</div>
</body>
</html>

View File

@ -77,32 +77,35 @@ _KEYSTONE_AUTH_TOKEN = [
help='Openstack auth plugin i.e. ( password, token, ...) '
'password is the only available plugin for the time being',
dest='auth_plugin'),
cfg.StrOpt('project_domain_id',
default='Default',
help='Openstack Project Domain id, default is Default',
dest='project_domain_id'),
cfg.StrOpt('user_domain_id',
default='Default',
help='Openstack user Domain id, default is Default',
dest='user_domain_id'),
cfg.StrOpt('project_domain_name',
default='Default',
help='Openstack Project Domain name, default is Default',
dest='project_domain_name'),
cfg.StrOpt('user_domain_name',
default='Default',
help='Openstack user Domain name, default is Default',
dest='user_domain_name'),
cfg.StrOpt('project_name',
default='services',
help='Openstack Project Name.',
dest='project_name'),
cfg.StrOpt('username',
help='Openstack username',
dest='username'),
cfg.StrOpt('password',
help='Openstack Password',
dest='password')
dest='password'),
cfg.StrOpt('project_name',
help='Openstack Project Name.',
dest='project_name'),
cfg.StrOpt('domain_name',
help='Openstack domain Name.',
dest='domain_name'),
cfg.StrOpt('project_domain_id',
help='Openstack Project Domain id, default is Default',
dest='project_domain_id'),
cfg.StrOpt('user_domain_id',
help='Openstack user Domain id, default is Default',
dest='user_domain_id'),
cfg.StrOpt('project_domain_name',
help='Openstack Project Domain name, default is Default',
dest='project_domain_name'),
cfg.StrOpt('user_domain_name',
help='Openstack user Domain name, default is Default',
dest='user_domain_name'),
cfg.DictOpt('kwargs',
help='Openstack Authentication arguments you can pass it here '
'as Key:Value, Key1:Value1, ... ',
dest='kwargs',
default={})
]
@ -132,6 +135,54 @@ _EVACUATION = [
dest='options')
]
_NOTIFIERS = [
cfg.StrOpt('driver',
default='osha.notifiers.drivers.osha.default_email.OshaEmail',
dest='driver',
help='Notification driver to load it to notify users '
'if something went wrong'),
cfg.StrOpt('endpoint',
default=None,
dest='endpoint',
help='Endpoint URL for the notification system. If you the '
'driver you are using doesnot require any URL just comment '
'it or use none'),
cfg.StrOpt('username',
default=None,
dest='username',
help='Username to authenticate against the notification system. '
'If the driver you are using doesnot require any '
'authentications comment or use None'),
cfg.StrOpt('password',
default=None,
dest='password',
help='Password to authenticate against the notification system. '
'If the driver you are using doesnot require any '
'authentications comment or use None'),
cfg.StrOpt('templates-dir',
dest='templates-dir',
default='/etc/osha/templates',
help='Path to Jinja2 templates directory that contains '
'message templates'),
cfg.DictOpt('options',
default={},
dest='options',
help='Key:Value Kwargs to pass it to the notification driver, '
'if you want to pass any special arguments for your '
'driver. '),
cfg.ListOpt('notify-list',
default=[],
dest='notify-list',
help='List of emails to sent them notification if something '
'went wrong and Osha wasnot able to send an email to the '
'tenant admin'),
cfg.StrOpt('notify-from',
dest='notify-from',
help='The sender address, it can be email address if we used '
'default email driver, or phone number if we use sms '
'gateway for example.')
]
def build_os_options():
osclient_opts = [
@ -235,6 +286,15 @@ def configure():
CONF.register_group(evacuators_grp)
CONF.register_opts(_EVACUATION, group='evacuation')
# Notification Section :)
notifiers_grp = cfg.OptGroup('notifiers',
title='Notification Options',
help='Notification Driver/plugin opts to be '
'used to Notify admins/users if failure '
'happens')
CONF.register_group(notifiers_grp)
CONF.register_opts(_NOTIFIERS, group='notifiers')
# Osha Auth
keystone_grp = cfg.OptGroup('keystone_authtoken',
title='Keystone Auth Options',
@ -275,7 +335,8 @@ def list_opts():
'monitoring': _MONITORS,
'keystone_authtoken': _KEYSTONE_AUTH_TOKEN,
'fencer': _FENCER,
'evacuation': _EVACUATION
'evacuation': _EVACUATION,
'notifiers': _NOTIFIERS
}
return _OPTS.items()

View File

@ -16,6 +16,7 @@ from keystoneclient.auth.identity import v3
from keystoneclient import session
from novaclient.v2 import client as novaclient
from neutronclient.v2_0 import client as neutronclient
from keystoneclient.v3 import client as keystoneclient
from oslo_log import log
LOG = log.getLogger(__name__)
@ -33,16 +34,23 @@ class OSClient:
self.authmethod = authmethod
self.authurl = authurl
self.auth_session = None
self.endpoint_type = 'internalURL'
self.interface = 'internal'
if authmethod == 'password':
self.username = kwargs.get('username', None)
self.password = kwargs.get('password')
self.project_name = kwargs.get('project_name', None)
self.project_id = kwargs.get('project_id', None)
self.user_id = kwargs.get('user_id', None)
self.user_domain_id = kwargs.get('user_domain_id', None)
self.user_domain_name = kwargs.get('user_domain_name', None)
self.project_domain_name = kwargs.get('project_domain_name', None)
self.endpoint_type = kwargs.get('endpoint_type', 'internal')
if 'endpoint_type' in kwargs:
self.endpoint_type = kwargs.pop('endpoint_type', 'internalURL')
if 'interface' in kwargs:
self.interface = kwargs.pop('interface', 'internal')
self.kwargs = kwargs
# self.username = kwargs.get('username', None)
# self.password = kwargs.get('password')
# self.project_name = kwargs.get('project_name', None)
# self.project_id = kwargs.get('project_id', None)
# self.user_id = kwargs.get('user_id', None)
# self.user_domain_id = kwargs.get('user_domain_id', None)
# self.user_domain_name = kwargs.get('user_domain_name', None)
# self.project_domain_name = kwargs.get('project_domain_name', None)
# self.endpoint_type = kwargs.get('endpoint_type', 'internalURL')
else:
print "The available authmethod is password for the time being" \
"Please, provide a password credentials :) "
@ -51,12 +59,7 @@ class OSClient:
def auth(self):
auth = v3.Password(auth_url=self.authurl,
username=self.username,
password=self.password,
project_name=self.project_name,
user_domain_id=self.user_domain_id,
user_domain_name=self.user_domain_name,
project_domain_name=self.project_domain_name)
**self.kwargs)
self.auth_session = session.Session(auth=auth)
def novacomputes(self):
@ -205,4 +208,79 @@ class OSClient:
return []
return hypervisors[0].servers
def get_hypervisor(self, node):
"""
Get an instance of the hypervisor, so you can do any operation you want.
:param node: dict contains host index
:return: Hypervisor
"""
auth_session = session.Session(auth=self.auth_session.auth)
nova = novaclient.Client(session=auth_session,
endpoint_type=self.endpoint_type)
hypervisors = nova.hypervisors.search(node.get('host'), True)
if not hypervisors:
return None
return hypervisors[0]
def get_instances_list(self, node):
auth_session = session.Session(auth=self.auth_session.auth)
nova = novaclient.Client(session=auth_session,
endpoint_type=self.endpoint_type)
servers = nova.servers.list(detailed=True,
search_opts={'host': node.get('host'),
'all_tenants': True})
servers_data = []
for server in servers:
servers_data.append(server.to_dict())
return servers_data
def get_affected_tenants(self, node):
return self.get_instances_list(node)
def list_tenants(self):
auth_session = session.Session(auth=self.auth_session.auth)
keystone = keystoneclient.Client(session=auth_session,
endpoint_type=self.endpoint_type)
projects = keystone.projects.list()
projects_data = []
for project in projects:
projects_data.append(project.to_dict())
return projects_data
def users_on_tenant(self, tenant):
auth_session = session.Session(auth=self.auth_session.auth)
keystone = keystoneclient.Client(session=auth_session,
endpoint_type=self.endpoint_type,
interface='internal')
users = []
try:
users = keystone.users.list(default_project=tenant)
except Exception as e:
print e
users_list = []
for user in users:
users_list.append(user.to_dict())
return users_list
def get_hypervisors_stats(self):
auth_session = session.Session(auth=self.auth_session.auth)
nova = novaclient.Client(session=auth_session,
endpoint_type=self.endpoint_type)
stats = nova.hypervisor_stats.statistics()
return stats.to_dict()
def get_hypervisor_details(self, node):
auth_session = session.Session(auth=self.auth_session.auth)
nova = novaclient.Client(session=auth_session,
endpoint_type=self.endpoint_type)
hypervisors = nova.hypervisors.list(detailed=True)
for hypervisor in hypervisors:
hypervisor = hypervisor.to_dict()
if hypervisor.get('hypervisor_hostname') == node.get('host'):
return hypervisor
return None

View File

@ -16,6 +16,7 @@ import os
from osha.common.osclient import OSClient
from oslo_config import cfg
from oslo_log import log
import jinja2
CONF = cfg.CONF
LOG = log.getLogger(__name__)
@ -44,7 +45,45 @@ def get_os_client():
user_domain_id=credentials.get('user_domain_id'),
project_domain_id=credentials.get('project_domain_id'),
project_domain_name=credentials.get('project_domain_name'),
user_domain_name=credentials.get('user_domain_name')
user_domain_name=credentials.get('user_domain_name'),
**credentials.get('kwargs')
)
return client
def load_jinja_templates(template_dir, template_name, template_vars):
"""
Load and render existing Jinja2 templates. The main purpose of the function
is to prepare the message to be sent and render it for the driver to send
it directly
:param template_dir: Location where jinja2 templates are stored
:param template_name: name of the template to load it
:param template_vars: Dict to replace existing vars in the template with
values.
:return: String message
"""
template_loader = jinja2.FileSystemLoader(searchpath=template_dir)
template_env = jinja2.Environment(loader=template_loader)
template = template_env.get_template(template_name)
return template.render(template_vars)
def get_admin_os_client():
"""
Loads credentials from [keystone_authtoken] section in the configuration
file and initialize the client with admin privileges and return
an instance of the client
:return: Initialized instance of OS Client
"""
credentials = CONF.get('keystone_authtoken')
client = OSClient(
authurl=credentials.get('auth_url'),
username=credentials.get('username'),
password=credentials.get('password'),
domain_name=credentials.get('domain_name'),
user_domain_id=credentials.get('user_domain_id'),
user_domain_name=credentials.get('user_domain_name'),
**credentials.get('kwargs')
)
return client

View File

@ -1,105 +0,0 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log
from osha.common.osclient import OSClient
from osha.fencers.common.manager import FencerManager
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class EvacuationManager(object):
"""
The Evacuation procedure is as follow:
1- Put node in maintenance mode (disable node )
2- make sure it's in maintenance mode or disabled
3- try to fence node and shutdown it
4- make sure node is down
5- Get a list of instances running on this node
6- Evacuate :)
"""
def __init__(self, nodes=[]):
"""
@todo we cannot get the credentials from monitoring so, we need to get
it from keystone section and we need to review that for code in other
parts
:return:
"""
credentials = CONF.get('keystone_authtoken')
self.client = OSClient(
authurl=credentials.get('auth_url'),
username=credentials.get('username'),
password=credentials.get('password'),
project_name=credentials.get('project_name'),
user_domain_id=credentials.get('user_domain_id'),
project_domain_id=credentials.get('project_domain_id'),
project_domain_name=credentials.get('project_domain_name'),
user_domain_name=credentials.get('user_domain_name')
)
self.nodes = nodes
if not nodes:
raise Exception('No nodes to evacuate ...')
def evacuate(self):
"""
This fn will do the evacuation process ...
:param nodes: List of Failed nodes got from the monitoring system
:return: List of nodes with success or Fail
"""
self.check_nodes_maintenance()
trigger_disable = False
for node in self.nodes:
if not node.get('status'):
trigger_disable = True
break
if trigger_disable:
self._disable_nodes()
self.fence_nodes()
self.list_host_instances()
def _disable_nodes(self):
disabled_nodes = []
for node in self.nodes:
node_status = self.client.get_node_status(hostname=node.get('host'))
if node_status.get('status') == 'enabled':
node['status'] = self.client.disable_node(
hostname=node.get('host'))
else:
node['status'] = True
disabled_nodes.append(node)
self.nodes = disabled_nodes
def check_nodes_maintenance(self):
nodes_status = []
for node in self.nodes:
status = self.client.get_node_status(hostname=node.get('host'))
if status.get('status') == 'enabled':
node['status'] = False
nodes_status.append(node)
else:
node['status'] = True
nodes_status.append(node)
self.nodes = nodes_status
def fence_nodes(self):
fencer = FencerManager(self.nodes)
nodes = fencer.fence()
print nodes
def list_host_instances(self):
self.client.evacuate(self.nodes)

View File

@ -1 +1,13 @@
__author__ = 'saad'
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -15,6 +15,8 @@ from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
from osha.fencers.common.manager import FencerManager
from time import sleep
from osha.evacuators.common.utils import get_nodes_details
CONF = cfg.CONF
LOG = log.getLogger(__name__)
@ -63,36 +65,40 @@ class EvacuationManager(object):
if self.enable_fencing:
fencer = FencerManager(nodes)
nodes = fencer.fence()
"""
@todo this code needs to be commented for the time being till we fix
nova bug found in state, which always go up afer enable or disable. We
will use get_node_details for the time being from the main script to
get nodes details before evacuating ...
succeeded_nodes = []
for node in nodes:
node['instances'] = self.driver.get_node_instances(node)
succeeded_nodes.append(node)
nodes = succeeded_nodes
"""
# Start evacuation calls ...
from time import sleep
for i in range(0, 10):
try:
sleep(30)
evacuated_nodes = self.driver.evacuate_nodes(nodes)
print "Try Number: ", i
print evacuated_nodes
except Exception as e:
LOG.error(e)
return evacuated_nodes
def get_nodes_details(self, nodes):
"""
To be re-structured after fixing the nova bug !
:param nodes: list of nodes
:return: list of node with more details
"""
return get_nodes_details(nodes)
def _disable_node(self, node):
if not self.driver.is_node_disabled(node):
return self.driver.disable_node(node)
else:
True
def reinitialize_driver(self):
evcuation_conf = CONF.get('evacuation')
self.driver = importutils.import_object(
evcuation_conf.get('driver'),
evcuation_conf.get('wait'),
evcuation_conf.get('retries'),
**evcuation_conf.get('options')
)

View File

@ -0,0 +1,61 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from osha.common.utils import get_os_client, get_admin_os_client
def get_nodes_details(nodes):
"""
Get the hypervisor details, instances running on it, tenants
:param nodes: list of hypervisors
:return: List of hypervisors with detailed information
"""
nodes_details = []
client = get_os_client()
for node in nodes:
instances = client.get_instances_list(node)
tenants = set([instance.get('tenant_id') for instance in instances])
node['instances'] = instances
node['tenants'] = tenants
node['details'] = client.get_hypervisor_details(node)
nodes_details.append(node)
nodes_details = get_users_on_tenants(nodes_details)
return nodes_details
def get_users_on_tenants(nodes):
"""
Lists all users that have access to a certain tenant.
REQUIRE ADMIN PRIVILEGES !
:param nodes: list of hypervisors
:return: List of hypervisors with detailed tenant info
"""
details = []
client = get_admin_os_client()
for node in nodes:
if 'tenants' in node:
tenants = []
for tenant in node.get('tenants'):
users = client.users_on_tenant(tenant)
tenants.append(
{'id': tenant,
'users': users,
'instances': [instance for instance in
node.get('instances') if
instance.get('tenant_id') == tenant]})
node['tenants'] = tenants
details.append(node)
return details

View File

@ -1 +1,13 @@
__author__ = 'saad'
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -16,6 +16,7 @@ from oslo_config import cfg
from oslo_log import log
from osha.monitors.common.manager import MonitorManager
from osha.evacuators.common.manager import EvacuationManager
from osha.notifiers.common.manager import NotificationManager
CONF = cfg.CONF
LOG = log.getLogger(__name__)
@ -37,7 +38,9 @@ def main():
# Load Fence driver
# Shutdown the node
evac = EvacuationManager()
notify_nodes = evac.get_nodes_details(nodes)
evac.evacuate(nodes)
exit()
print "Fenced nodes are", nodes
notifier = NotificationManager()
notifier.notify(notify_nodes, 'success')
else:
print "No nodes reported to be down"

View File

@ -1,102 +0,0 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from time import sleep
class Monitor(object):
def __init__(self, client, wait):
self.client = client
self.wait = wait
def get_down_nodes(self):
# list all down nova compute
nova_down = self.is_nova_service_down()
# list all down hypervisors
hypervisor_down = self.is_hpyervisor_down()
# list all down openvswitch agents
agents_down = self.is_neutron_agents_down()
nodes_down = []
for node in nova_down:
if node in hypervisor_down and node in agents_down:
nodes_down.append(node)
return nodes_down
def monitor(self):
nodes_down = self.get_down_nodes()
nodes_to_evacuate = []
if nodes_down:
nodes_to_evacuate = self.process_failed_nodes(nodes_down)
evacuated_nodes = []
if nodes_to_evacuate:
evacuated_nodes = self.evacuate(nodes_to_evacuate)
if not evacuated_nodes:
raise "Error: node didn't evacuated !", nodes_to_evacuate
self.notify(evacuated_nodes)
# @todo needs to be implemented !
def notify(self, nodes):
print "These nodes %s Evacuated" % nodes[0]['host']
print nodes
"""
will be used to notify the admins that there is something went wrong !
"""
pass
def evacuate(self, nodes):
# @todo add shutdown process
# maintence mode not working with libvirt
# self.client.set_in_maintance(nodes)
evacuated = self.client.evacuate(nodes)
return evacuated
def process_failed_nodes(self, nodes):
sleep(self.wait)
nodes_down = self.get_down_nodes()
to_be_evacuated = []
for node in nodes_down:
if node in nodes:
to_be_evacuated.append(node)
return to_be_evacuated
def is_hpyervisor_down(self):
hypervisors = self.client.novahypervisors()
down_hosts = []
for hypervisor in hypervisors:
if hypervisor.get('state') == 'down':
host = {}
host['host'] = hypervisor.get('service').get('host')
down_hosts.append(hypervisor.get('service').get('host'))
return down_hosts
def is_nova_service_down(self):
computes = self.client.novacomputes()
down_hosts = []
for node in computes:
if node.get('state') == 'down' and node.get('status') == 'enabled':
down_hosts.append(node.get('host'))
return down_hosts
def is_neutron_agents_down(self):
agents = self.client.neutronagents()
down_hosts = []
for agent in agents:
if agent.get('admin_state_up') and not agent.get('alive'):
down_hosts.append(agent.get('host'))
return down_hosts

View File

@ -0,0 +1,13 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,13 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,53 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import abc
@six.add_metaclass(abc.ABCMeta)
class NotifierBaseDriver(object):
"""
Used to notify admins/users at any stage that an error happened or process
completed or something went wrong !
"""
def __init__(self, url, username, password, templates_dir, notify_from,
admin_list=None, **kwargs):
"""
Initialize the notification backend.
:param url: Notification system backend
:param username: Username
:param password: Password
:param templates_dir: Path to templates directory to load message
templates
:param kwargs: Key:Value arguments
"""
self.url = url
self.username = username
self.password = password
self.templates_dir = templates_dir
self.admin_list = admin_list
self.notify_from = notify_from
self.options = kwargs
@abc.abstractmethod
def notify(self, node, status):
"""
Custom notification method. Can be used if you want to send custom
notification about Tenant, Instance, or go deeper if you want
:param node: Compute Host, Tenant, Instance, ...
:param status: Error, Success, Info
:return: True, False
"""
pass

View File

@ -0,0 +1,53 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class NotificationManager(object):
def __init__(self):
notifer_conf = CONF.get('notifiers')
self.driver = importutils.import_object(
notifer_conf.get('driver'),
notifer_conf.get('endpoint'),
notifer_conf.get('username'),
notifer_conf.get('password'),
notifer_conf.get('templates-dir'),
notifer_conf.get('notify-from'),
notifer_conf.get('notify-list'),
**notifer_conf.get('options')
)
def notify(self, nodes, status):
"""
Send Notification to users added on tenants that has VMs running on the
affected host.
:param nodes: List of hosts that are affected, contains instances
running on those hosts, tenants, users added on those tenants.
:param status: success or error
:return:
"""
for node in nodes:
self.driver.notify(node, status)

View File

@ -0,0 +1,13 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,13 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,109 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log
from osha.notifiers.common.driver import NotifierBaseDriver
from osha.common.utils import load_jinja_templates
from datetime import date
import time
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class OshaEmail(NotifierBaseDriver):
def __init__(self, url, username, password, templates_dir, notify_from,
admin_list=None, **kwargs):
super(OshaEmail, self).__init__(url, username, password, templates_dir,
notify_from, admin_list, **kwargs)
LOG.info('Initializing OshaEmail driver @ {0}'.format(url))
server = smtplib.SMTP(url, kwargs.get('port'))
server.ehlo()
if kwargs.get('tls'):
LOG.info('TLS enabled !')
server.starttls()
if username and password:
server.login(username, password)
LOG.info('Logged in !')
self.server = server
def notify(self, node, status):
_template = 'info.jinja'
if status == 'success':
_template = 'user_success.jinja'
elif status == 'error':
_template = 'error.jinja'
for tenant in node.get('tenants'):
for user in tenant.get('users'):
if 'email' in user:
subject = '[' + status + '] Evacuation Status'
print tenant.get('instances')
template_vars = {
'name': user.get('name'),
'tenant': tenant.get('id'),
'instances': tenant.get('instances'),
'evacuation_time': date.fromtimestamp(time.time())
}
message = load_jinja_templates(self.templates_dir,
_template, template_vars)
self.send_email(self.notify_from, user.get('email'),
subject, html_msg=message)
# notify administrators
subject = 'Host Evacuation status'
_template = 'success.jinja'
template_vars = {
'host': node.get('host'),
'tenants': node.get('tenants'),
'instances': node.get('instances'),
'hypervisor': node.get('details'),
'evacuation_time': date.fromtimestamp(time.time())
}
message = load_jinja_templates(self.templates_dir, _template,
template_vars)
self.send_email(self.notify_from, self.notify_from, subject,
message, self.admin_list or None)
def send_email(self, mail_from, mail_to, subject, html_msg, cc_list=None,
plain_msg=None):
LOG.info('Sending email ....')
message = MIMEMultipart()
message['Subject'] = subject
message['to'] = mail_to
if cc_list:
message['cc'] = ', '.join(cc_list)
message['from'] = mail_from or self.notify_from
msg = MIMEText(html_msg, 'html')
message.attach(msg)
if plain_msg:
plain_msg = MIMEText(plain_msg, 'plain')
message.attach(plain_msg)
try:
self.server.sendmail(mail_from, mail_to,
message.as_string())
LOG.info('Email sent successfully !')
except Exception as e:
LOG.error(e)
def __exit__(self, exc_type, exc_val, exc_tb):
self.server.quit()

View File

@ -1,41 +0,0 @@
#__author__ = 'saad'
import sys
import logging as log
import time
from osha.common.daemon import Daemon
log.basicConfig(filename='osha.log')
class Osha(Daemon):
def run(self):
# @todo scheduling code goes here ! may be apscheduler or just cron !
# just as a test ...
while True:
time.sleep(1)
if __name__ == '__main__':
osha = Osha('/var/run/osha/osha.pid') # won't run unless the folder is
# already created and have the correct permissions !
if len(sys.argv) == 2:
if sys.argv[1] == 'start':
osha.start()
elif sys.argv[1] == 'stop':
osha.stop()
elif sys.argv[1] == 'restart':
osha.restart()
elif sys.argv[1] == 'status':
osha.status()
else:
print "Unknown command "
print "Usage %s start|stop|restart" % sys.argv[0]
sys.exit(2)
sys.exit(0)
else:
print "Usage %s start|stop|restart" % sys.argv[0]
sys.exit(0)

View File

@ -6,4 +6,5 @@ PyYAML>=3.1.0
oslo.config>=1.9.3,<1.10.0 # Apache-2.0
oslo.i18n>=1.5.0,<1.6.0 # Apache-2.0
oslo.utils>=1.4.0,!=1.4.1,<1.5.0
libvirt-python>=1.2.5
libvirt-python>=1.2.5
Jinja2>=2.6