Notify if metrics are not defined or not added for hosts
Now Monitoring driver is getting notifier instance to be used in case of failure get data for a certain metric or failed to find the metric added on a certain host, the notifier will be used to notify admins list (notify-list in config file) that something is wrong with the metrics and need to look at it Base notification driver changed to support the following methods: 1. notify_status: will be used to notify if node is failed and evacuated 2. notify: will be used anywhere in monitoring drivers to notify administrators if something is wrong. Change-Id: I12eddf03d1921a04f5fa9a8101a471bea5f9c507
This commit is contained in:
parent
7ca4980a8c
commit
61558d2b33
|
@ -26,8 +26,10 @@ def main():
|
|||
config.configure()
|
||||
config.setup_logging()
|
||||
LOG.info('Starting Freezer DR ... ')
|
||||
# initialize the notification driver as it will be used in many parts
|
||||
notifier = NotificationManager()
|
||||
# load and initialize the monitoring driver
|
||||
monitor = MonitorManager()
|
||||
monitor = MonitorManager(notifier=notifier.get_driver())
|
||||
# Do the monitoring procedure
|
||||
# Monitor, analyse, nodes down ?, wait, double check ? evacuate ..
|
||||
nodes = monitor.monitor()
|
||||
|
@ -40,7 +42,6 @@ def main():
|
|||
evac = EvacuationManager()
|
||||
notify_nodes = evac.get_nodes_details(nodes)
|
||||
evac.evacuate(nodes)
|
||||
notifier = NotificationManager()
|
||||
notifier.notify(notify_nodes, 'success')
|
||||
else:
|
||||
print "No nodes reported to be down"
|
||||
|
|
|
@ -28,7 +28,7 @@ class MonitorBaseDriver(object):
|
|||
"""
|
||||
_OPTS = []
|
||||
|
||||
def __init__(self, backend_name):
|
||||
def __init__(self, backend_name, notifier):
|
||||
"""
|
||||
Initializing the driver. Any monitoring system requires the following
|
||||
parameters to call it's api. All these parameters can be passed from the
|
||||
|
@ -36,9 +36,15 @@ class MonitorBaseDriver(object):
|
|||
:param backend_name: Name of section in the configuration file that
|
||||
contains your driver initialization details; like username, password,
|
||||
endpoint and so on. Variables in this section depends on your driver
|
||||
|
||||
:param notifier: Notifier instance which can be used to notify the
|
||||
admins in case of error or problem happened during the DR process.
|
||||
You should only call notify method and send it your message to send
|
||||
it to the admins
|
||||
"""
|
||||
CONF.register_opts(self._OPTS, group=backend_name)
|
||||
self.conf = CONF.get(backend_name)
|
||||
self.notifier = notifier
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_data(self):
|
||||
|
|
|
@ -21,12 +21,13 @@ LOG = log.getLogger(__name__)
|
|||
|
||||
class MonitorManager(object):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, notifier):
|
||||
monitor = CONF.get('monitoring')
|
||||
backend_name = monitor['backend_name']
|
||||
self.driver = importutils.import_object(
|
||||
monitor.driver,
|
||||
backend_name=backend_name
|
||||
backend_name=backend_name,
|
||||
notifier=notifier
|
||||
)
|
||||
driver_info = self.driver.get_info()
|
||||
LOG.info('Initializing driver %s with version %s found in %s' %
|
||||
|
|
|
@ -42,8 +42,9 @@ class StandardDriver(MonitorBaseDriver):
|
|||
' key:value format'),
|
||||
]
|
||||
|
||||
def __init__(self, backend_name):
|
||||
super(StandardDriver, self).__init__(backend_name=backend_name)
|
||||
def __init__(self, backend_name, notifier):
|
||||
super(StandardDriver, self).__init__(backend_name=backend_name,
|
||||
notifier=notifier)
|
||||
self.endpoint = self.conf.endpoint
|
||||
client = OSClient(
|
||||
authurl=self.conf.endpoint,
|
||||
|
|
|
@ -11,8 +11,13 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_config import cfg
|
||||
|
||||
from freezer_dr.monitors.common.driver import MonitorBaseDriver
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
class DummyDriver(MonitorBaseDriver):
|
||||
"""A monitoring driver that returns a configured list of nodes as failed.
|
||||
|
@ -22,11 +27,18 @@ class DummyDriver(MonitorBaseDriver):
|
|||
monitoring section of the freezer_dr configuration file as follows:
|
||||
kwargs = nodes_down:hostname1;hostname2
|
||||
"""
|
||||
_OPTS = [
|
||||
cfg.ListOpt('nodes_down',
|
||||
default=[],
|
||||
required=True,
|
||||
help="fake list of failed compute nodes.")
|
||||
]
|
||||
|
||||
def __init__(self, username, password, endpoint, **kwargs):
|
||||
super(DummyDriver, self).__init__(username, password, endpoint, **kwargs)
|
||||
def __init__(self, backend_name, notifier):
|
||||
super(DummyDriver, self).__init__(backend_name=backend_name,
|
||||
notifier=notifier)
|
||||
|
||||
hostnames = kwargs['nodes_down'].split(';')
|
||||
hostnames = self.conf.get('nodes_down', [])
|
||||
self.nodes_down = [{'host': n} for n in hostnames]
|
||||
|
||||
def get_data(self):
|
||||
|
@ -48,5 +60,5 @@ class DummyDriver(MonitorBaseDriver):
|
|||
return {
|
||||
'name': 'Freezer DR Dummy Driver',
|
||||
'version': 1.0,
|
||||
'author': 'Hewlett-Packard Development Company, L.P'
|
||||
'author': 'Hewlett-Packard Enterprise Development, L.P'
|
||||
}
|
||||
|
|
|
@ -82,8 +82,9 @@ class MonascaDriver(driver.MonitorBaseDriver):
|
|||
"Default is all")
|
||||
]
|
||||
|
||||
def __init__(self, backend_name):
|
||||
super(MonascaDriver, self).__init__(backend_name=backend_name)
|
||||
def __init__(self, backend_name, notifier):
|
||||
super(MonascaDriver, self).__init__(backend_name=backend_name,
|
||||
notifier=notifier)
|
||||
self.monasca_client = client.Client(
|
||||
"2_0",
|
||||
self.conf['monasca_url'],
|
||||
|
@ -198,7 +199,9 @@ class MonascaDriver(driver.MonitorBaseDriver):
|
|||
for node, metrics in nodes.iteritems():
|
||||
node_data = {node: []}
|
||||
for metric_name, metric_data in metrics.iteritems():
|
||||
node_data[node].append(self.__process_metric(metric_name, metric_data))
|
||||
node_data[node].append(
|
||||
self.__process_metric(node, metric_name, metric_data)
|
||||
)
|
||||
nodes_data.append(node_data)
|
||||
|
||||
aggregate = self.conf.get('aggregate', 'all')
|
||||
|
@ -221,7 +224,7 @@ class MonascaDriver(driver.MonitorBaseDriver):
|
|||
if True in host.values()
|
||||
]
|
||||
|
||||
def __process_metric(self, metric_name, metric_data):
|
||||
def __process_metric(self, node, metric_name, metric_data):
|
||||
metric_conf = CONF[metric_name]
|
||||
# process UNDETERMINED State and change it to the required state
|
||||
metric_data = [
|
||||
|
@ -229,6 +232,30 @@ class MonascaDriver(driver.MonitorBaseDriver):
|
|||
metric_conf.get('undetermined', 'ALARM').upper()
|
||||
for i in metric_data
|
||||
]
|
||||
if not metric_data:
|
||||
message = """
|
||||
No data found for this metric: {0} <br />
|
||||
Data returned: {1} <br />
|
||||
hostname: {2} <br />
|
||||
Cause might be: <br />
|
||||
<ul>
|
||||
<li>Metric is not defined in Monasca </li>
|
||||
<li>Alarm with this metric name is not set for this host </li>
|
||||
<li>Check your Monasca configuration and Metric configuration
|
||||
defined in freezer-dr.conf </li>
|
||||
</ul>
|
||||
You can try this command to check: <br />
|
||||
$ monasca alarm-list --metric-name {3} --metric-dimensions
|
||||
hostname={2}
|
||||
<br /> <br />
|
||||
Freezer-DR
|
||||
""".format(metric_name, str(metric_data), node,
|
||||
metric_conf['metric_name'])
|
||||
self.notifier.notify(message)
|
||||
LOG.warning("No data found for metric: {0} on host: {1}".format(
|
||||
metric_name, node
|
||||
))
|
||||
exit(1)
|
||||
# build the decision
|
||||
aggregate = metric_conf.get('aggregate')
|
||||
aggregate += "(x=='ALARM' for x in metric_data)"
|
||||
|
|
|
@ -17,15 +17,13 @@ import abc
|
|||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class NotifierBaseDriver(object):
|
||||
"""
|
||||
Used to notify admins/users at any stage that an error happened or process
|
||||
completed or something went wrong !
|
||||
""" Used to notify admins/users at any stage that an error happened or
|
||||
process completed or something went wrong !
|
||||
"""
|
||||
|
||||
def __init__(self, url, username, password, templates_dir, notify_from,
|
||||
admin_list=None, **kwargs):
|
||||
"""
|
||||
Initialize the notification backend.
|
||||
""" Initialize the notification backend.
|
||||
:param url: Notification system backend
|
||||
:param username: Username
|
||||
:param password: Password
|
||||
|
@ -42,12 +40,20 @@ class NotifierBaseDriver(object):
|
|||
self.options = kwargs
|
||||
|
||||
@abc.abstractmethod
|
||||
def notify(self, node, status):
|
||||
"""
|
||||
Custom notification method. Can be used if you want to send custom
|
||||
def notify_status(self, node, status):
|
||||
""" Custom notification method. Can be used if you want to send custom
|
||||
notification about Tenant, Instance, or go deeper if you want
|
||||
:param node: Compute Host, Tenant, Instance, ...
|
||||
:param status: Error, Success, Info
|
||||
:return: True, False
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def notify(self, message):
|
||||
""" This method will be used in different places to notify admins
|
||||
about certain problem
|
||||
:param message: String message name
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
|
|
@ -45,9 +45,10 @@ class NotificationManager(object):
|
|||
:return:
|
||||
"""
|
||||
for node in nodes:
|
||||
self.driver.notify(node, status)
|
||||
|
||||
|
||||
self.driver.notify_status(node, status)
|
||||
|
||||
def get_driver(self):
|
||||
return self.driver
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ class StandardEmail(NotifierBaseDriver):
|
|||
LOG.info('Logged in !')
|
||||
self.server = server
|
||||
|
||||
def notify(self, node, status):
|
||||
def notify_status(self, node, status):
|
||||
_template = 'info.jinja'
|
||||
if status == 'success':
|
||||
_template = 'user_success.jinja'
|
||||
|
@ -102,6 +102,19 @@ class StandardEmail(NotifierBaseDriver):
|
|||
except Exception as e:
|
||||
LOG.error(e)
|
||||
|
||||
def notify(self, message):
|
||||
try:
|
||||
self.send_email(
|
||||
mail_from=self.notify_from,
|
||||
mail_to=self.notify_from,
|
||||
subject="[Freezer-DR] Problem Occurred",
|
||||
html_msg=message,
|
||||
cc_list=self.admin_list or []
|
||||
)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.server.quit()
|
||||
|
||||
|
|
Loading…
Reference in New Issue