diff --git a/masakari/engine/drivers/taskflow/driver.py b/masakari/engine/drivers/taskflow/driver.py index d8aad36b..84889686 100644 --- a/masakari/engine/drivers/taskflow/driver.py +++ b/masakari/engine/drivers/taskflow/driver.py @@ -20,6 +20,7 @@ Driver TaskFlowDriver: """ from oslo_log import log as logging +from oslo_utils import excutils from masakari.compute import nova from masakari.engine import driver @@ -39,6 +40,81 @@ class TaskFlowDriver(driver.NotificationDriver): def __init__(self): super(TaskFlowDriver, self).__init__() + def _execute_auto_workflow(self, novaclient, process_what): + flow_engine = host_failure.get_auto_flow(novaclient, process_what) + + # Attaching this listener will capture all of the notifications + # that taskflow sends out and redirect them to a more useful + # log for masakari's debugging (or error reporting) usage. + with base.DynamicLogListener(flow_engine, logger=LOG): + flow_engine.run() + + def _execute_rh_workflow(self, novaclient, process_what, + reserved_host_list): + if not reserved_host_list: + msg = _('No reserved_hosts available for evacuation.') + LOG.info(msg) + raise exception.ReservedHostsUnavailable(message=msg) + + process_what['reserved_host_list'] = reserved_host_list + flow_engine = host_failure.get_rh_flow(novaclient, process_what) + + with base.DynamicLogListener(flow_engine, logger=LOG): + try: + flow_engine.run() + except exception.LockAlreadyAcquired as ex: + raise exception.HostRecoveryFailureException(ex.message) + + def _execute_auto_priority_workflow(self, novaclient, process_what, + reserved_host_list): + try: + self._execute_auto_workflow(novaclient, process_what) + except Exception as ex: + with excutils.save_and_reraise_exception(reraise=False) as ctxt: + if isinstance(ex, exception.SkipHostRecoveryException): + ctxt.reraise = True + return + + # Caught generic Exception to make sure that any failure + # should lead to execute 'reserved_host' recovery workflow. + msg = _LW("Failed to evacuate all instances from " + "failed_host: '%(failed_host)s' using " + "'%(auto)s' workflow, retrying using " + "'%(reserved_host)s' workflow.") + LOG.warning(msg, { + 'failed_host': process_what['host_name'], + 'auto': fields.FailoverSegmentRecoveryMethod.AUTO, + 'reserved_host': + fields.FailoverSegmentRecoveryMethod.RESERVED_HOST + }) + self._execute_rh_workflow(novaclient, process_what, + reserved_host_list) + + def _execute_rh_priority_workflow(self, novaclient, process_what, + reserved_host_list): + try: + self._execute_rh_workflow(novaclient, process_what, + reserved_host_list) + except Exception as ex: + with excutils.save_and_reraise_exception(reraise=False) as ctxt: + if isinstance(ex, exception.SkipHostRecoveryException): + ctxt.reraise = True + return + + # Caught generic Exception to make sure that any failure + # should lead to execute 'auto' recovery workflow. + msg = _LW("Failed to evacuate all instances from " + "failed_host '%(failed_host)s' using " + "'%(reserved_host)s' workflow, retrying using " + "'%(auto)s' workflow") + LOG.warning(msg, { + 'failed_host': process_what['host_name'], + 'reserved_host': + fields.FailoverSegmentRecoveryMethod.RESERVED_HOST, + 'auto': fields.FailoverSegmentRecoveryMethod.AUTO + }) + self._execute_auto_workflow(novaclient, process_what) + def execute_host_failure(self, context, host_name, recovery_method, notification_uuid, reserved_host_list=None): novaclient = nova.API() @@ -50,42 +126,28 @@ class TaskFlowDriver(driver.NotificationDriver): try: if recovery_method == fields.FailoverSegmentRecoveryMethod.AUTO: - flow_engine = host_failure.get_auto_flow(novaclient, - process_what) + self._execute_auto_workflow(novaclient, process_what) elif recovery_method == ( fields.FailoverSegmentRecoveryMethod.RESERVED_HOST): - if not reserved_host_list: - msg = _('No reserved_hosts available for evacuation.') - LOG.info(msg) - raise exception.ReservedHostsUnavailable(message=msg) - - process_what['reserved_host_list'] = reserved_host_list - flow_engine = host_failure.get_rh_flow( - novaclient, process_what) + self._execute_rh_workflow(novaclient, process_what, + reserved_host_list) elif recovery_method == ( fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY): - raise NotImplementedError(_("Flow not implemented for " - "recovery_method"), - recovery_method) - elif recovery_method == ( - fields.FailoverSegmentRecoveryMethod.RH_PRIORITY): - raise NotImplementedError(_("Flow not implemented for " - "recovery_method"), - recovery_method) - except Exception: - msg = (_('Failed to create host failure flow.'), - notification_uuid) - LOG.exception(msg) - raise exception.MasakariException(msg) - - # Attaching this listener will capture all of the notifications that - # taskflow sends out and redirect them to a more useful log for - # masakari's debugging (or error reporting) usage. - with base.DynamicLogListener(flow_engine, logger=LOG): - try: - flow_engine.run() - except exception.LockAlreadyAcquired as ex: - raise exception.HostRecoveryFailureException(ex.message) + self._execute_auto_priority_workflow(novaclient, process_what, + reserved_host_list) + else: + self._execute_rh_priority_workflow(novaclient, process_what, + reserved_host_list) + except Exception as exc: + with excutils.save_and_reraise_exception(reraise=False) as ctxt: + if isinstance(exc, (exception.SkipHostRecoveryException, + exception.HostRecoveryFailureException, + exception.ReservedHostsUnavailable)): + ctxt.reraise = True + return + msg = _("Failed to execute host failure flow for " + "notification '%s'.") % notification_uuid + raise exception.MasakariException(msg) def execute_instance_failure(self, context, instance_uuid, notification_uuid): diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_taskflow_driver.py b/masakari/tests/unit/engine/drivers/taskflow/test_taskflow_driver.py new file mode 100644 index 00000000..481b6360 --- /dev/null +++ b/masakari/tests/unit/engine/drivers/taskflow/test_taskflow_driver.py @@ -0,0 +1,217 @@ +# Copyright 2017 NTT DATA +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from masakari import context +from masakari.engine.drivers.taskflow import base +from masakari.engine.drivers.taskflow import driver +from masakari.engine.drivers.taskflow import host_failure +from masakari import exception +from masakari.objects import fields +from masakari import test +from masakari.tests import uuidsentinel + + +class FakeFlow(object): + """Fake flow class of taskflow.""" + + def run(self): + # run method which actually runs the flow + pass + + +class TaskflowDriverTestCase(test.TestCase): + + def setUp(self): + super(TaskflowDriverTestCase, self).setUp() + self.taskflow_driver = driver.TaskFlowDriver() + self.ctxt = context.get_admin_context() + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_auto_priority_recovery_flow_auto_success( + self, mock_rh_flow, mock_auto_flow, mock_listener): + mock_auto_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock(return_value=None) + self.taskflow_driver.execute_host_failure( + self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY, + uuidsentinel.fake_notification, reserved_host_list=[ + 'host-1', 'host-2']) + + # Ensures that 'auto' flow executes successfully + self.assertTrue(mock_auto_flow.called) + # Ensures that 'reserved_host' flow will not execute + self.assertFalse(mock_rh_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_auto_priority_recovery_flow_rh_success( + self, mock_rh_flow, mock_auto_flow, mock_listener): + mock_auto_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.HostRecoveryFailureException) + self.taskflow_driver.execute_host_failure( + self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY, + uuidsentinel.fake_notification, reserved_host_list=[ + 'host-1', 'host-2']) + + # Ensures that 'auto' flow fails to recover instances + self.assertTrue(mock_auto_flow.called) + # Ensures that 'reserved_host' flow executes as 'auto' flow fails + self.assertTrue(mock_rh_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_rh_priority_recovery_flow_rh_success( + self, mock_rh_flow, mock_auto_flow, mock_listener): + mock_rh_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock(return_value=None) + self.taskflow_driver.execute_host_failure( + self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.RH_PRIORITY, + uuidsentinel.fake_notification, reserved_host_list=[ + 'host-1', 'host-2']) + + # Ensures that 'reserved_host' flow executes successfully + self.assertTrue(mock_rh_flow.called) + # Ensures that 'auto' flow will not execute + self.assertFalse(mock_auto_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_rh_priority_recovery_flow_auto_success( + self, mock_rh_flow, mock_auto_flow, mock_listener): + mock_rh_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.HostRecoveryFailureException) + self.taskflow_driver.execute_host_failure( + self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.RH_PRIORITY, + uuidsentinel.fake_notification, reserved_host_list=[ + 'host-1', 'host-2']) + + # Ensures that 'reserved_host' flow fails to recover instances + self.assertTrue(mock_rh_flow.called) + # Ensures that 'auto' flow executes as 'reserved_host' flow fails + self.assertTrue(mock_auto_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_complete_auto_priority_recovery_flow_failure( + self, mock_rh_flow, mock_auto_flow, mock_listener): + + mock_auto_flow.return_value = FakeFlow + mock_rh_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.HostRecoveryFailureException) + + # Ensures that both 'auto' and 'reserved_host' flow fails to + # evacuate instances + self.assertRaises( + exception.HostRecoveryFailureException, + self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY, + uuidsentinel.fake_notification, + reserved_host_list=['host-1', 'host-2']) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_complete_rh_priority_recovery_flow_failure( + self, mock_rh_flow, mock_auto_flow, mock_listener): + + mock_rh_flow.return_value = FakeFlow + mock_auto_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.HostRecoveryFailureException) + + # Ensures that both 'reserved_host' and 'auto' flow fails to + # evacuate instances + self.assertRaises( + exception.HostRecoveryFailureException, + self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.RH_PRIORITY, + uuidsentinel.fake_notification, + reserved_host_list=['host-1', 'host-2']) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_rh_priority_recovery_flow_skip_recovery( + self, mock_rh_flow, mock_auto_flow, mock_listener): + + mock_rh_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.SkipHostRecoveryException) + + self.assertRaises( + exception.SkipHostRecoveryException, + self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.RH_PRIORITY, + uuidsentinel.fake_notification, + reserved_host_list=['host-1', 'host-2']) + + # Ensures that 'reserved_host' flow executes but skip the host + # recovery + self.assertTrue(mock_rh_flow.called) + # Ensures that 'auto' flow will not execute + self.assertFalse(mock_auto_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_auto_priority_recovery_flow_skip_recovery( + self, mock_rh_flow, mock_auto_flow, mock_listener): + + mock_auto_flow.return_value = FakeFlow + FakeFlow.run = mock.Mock( + side_effect=exception.SkipHostRecoveryException) + + self.assertRaises( + exception.SkipHostRecoveryException, + self.taskflow_driver.execute_host_failure, self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.AUTO_PRIORITY, + uuidsentinel.fake_notification, + reserved_host_list=['host-1', 'host-2']) + + # Ensures that 'auto' flow executes but skip the host recovery + self.assertTrue(mock_auto_flow.called) + # Ensures that 'reserved_host' flow will not execute + self.assertFalse(mock_rh_flow.called) + + @mock.patch.object(base, 'DynamicLogListener') + @mock.patch.object(host_failure, 'get_auto_flow') + @mock.patch.object(host_failure, 'get_rh_flow') + def test_rh_priority_recovery_flow_reserved_hosts_not_available( + self, mock_rh_flow, mock_auto_flow, mock_listener): + + self.taskflow_driver.execute_host_failure( + self.ctxt, 'fake_host', + fields.FailoverSegmentRecoveryMethod.RH_PRIORITY, + uuidsentinel.fake_notification) + + # Ensures that if there are no reserved_hosts for recovery + # 'reserved_host' flow will not execute + self.assertFalse(mock_rh_flow.called) + # Ensures that 'auto' flow executes as 'reserved_host' flow fails + self.assertTrue(mock_auto_flow.called) diff --git a/releasenotes/notes/auto_priority_and_rh_priority_recovery_methods-b88cc00041fa2c4d.yaml b/releasenotes/notes/auto_priority_and_rh_priority_recovery_methods-b88cc00041fa2c4d.yaml new file mode 100644 index 00000000..dce3e43a --- /dev/null +++ b/releasenotes/notes/auto_priority_and_rh_priority_recovery_methods-b88cc00041fa2c4d.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + Implemented workflow for 'auto_priority' and 'rh_priority' recovery methods + in case of host failure recovery. Operators can set failover_segment's + recovery_method as 'auto_priority' and 'rh_priority' now. In case of + 'auto_priority' the 'auto' workflow will be executed first to recover the + instances from failed compute host. If 'auto' workflow fails to recover + the instances then 'reserved_host' workflow will be tried. In case of + 'rh_priority' the 'reserved_host' workflow will be executed first to + recover the instances from failed compute host. If 'reserved_host' workflow + fails to recover the instances then 'auto' workflow will be tried. \ No newline at end of file