From 7aef2966f5a8c0f097d9614fcece7657e80f20b4 Mon Sep 17 00:00:00 2001 From: dineshbhor Date: Thu, 6 Apr 2017 19:22:24 +0530 Subject: [PATCH] Ignore instance recovery for 'paused' or 'rescued' instance If masakari receives instance failure notification it fails to recover that instance if it is in 'paused' or 'rescued' state. As a recovery action masakari-engine gives call to nova to stop the instance but as nova doesn't allow this it returns 409 which result into instance recovery failure and masakari marks that notification status as "error". This can be solved by maintaning consistency between the vm_state before and after recovery but it requires to start the instance again to gain the qemu process of an instance back alive which might change the internal state of the instance which results into inconsistency between instance state before and after recovery. So as a solution this patch proposes to ignore the instance recovery and logs a warning if the instance is in 'paused' or 'rescued' state. Closes-Bug: #1663513 Change-Id: Id1cce45aad253527bedb58ab32f3d89637e02582 --- .../drivers/taskflow/instance_failure.py | 7 +++++ masakari/engine/manager.py | 2 ++ masakari/exception.py | 4 +++ masakari/tests/unit/engine/test_engine_mgr.py | 30 +++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/masakari/engine/drivers/taskflow/instance_failure.py b/masakari/engine/drivers/taskflow/instance_failure.py index e8e866c3..f5012ee6 100644 --- a/masakari/engine/drivers/taskflow/instance_failure.py +++ b/masakari/engine/drivers/taskflow/instance_failure.py @@ -56,6 +56,13 @@ class StopInstanceTask(base.MasakariTask): raise exception.SkipInstanceRecoveryException() vm_state = getattr(instance, 'OS-EXT-STS:vm_state') + if vm_state in ['paused', 'rescued']: + msg = _("Recovery of instance '%(instance_uuid)s' is ignored as" + " it is in '%(vm_state)s' state.") % { + 'instance_uuid': instance_uuid, 'vm_state': vm_state} + LOG.warning(msg) + raise exception.IgnoreInstanceRecoveryException(msg) + if vm_state != 'stopped': if vm_state == 'resized': self.novaclient.reset_instance_state( diff --git a/masakari/engine/manager.py b/masakari/engine/manager.py index 58db9749..27127ec9 100644 --- a/masakari/engine/manager.py +++ b/masakari/engine/manager.py @@ -110,6 +110,8 @@ class MasakariManager(manager.Manager): self.driver.execute_instance_failure( context, notification.payload.get('instance_uuid'), notification.notification_uuid) + except exception.IgnoreInstanceRecoveryException: + notification_status = fields.NotificationStatus.IGNORED except exception.SkipInstanceRecoveryException: notification_status = fields.NotificationStatus.FINISHED except (exception.MasakariException, diff --git a/masakari/exception.py b/masakari/exception.py index eb587e43..d0faa1a1 100644 --- a/masakari/exception.py +++ b/masakari/exception.py @@ -352,3 +352,7 @@ class ReservedHostsUnavailable(MasakariException): class LockAlreadyAcquired(MasakariException): msg_fmt = _('Lock is already acquired on %(resource)s.') + + +class IgnoreInstanceRecoveryException(MasakariException): + msg_fmt = _('Instance recovery is ignored.') diff --git a/masakari/tests/unit/engine/test_engine_mgr.py b/masakari/tests/unit/engine/test_engine_mgr.py index f2a595a7..0ba4fb85 100644 --- a/masakari/tests/unit/engine/test_engine_mgr.py +++ b/masakari/tests/unit/engine/test_engine_mgr.py @@ -388,3 +388,33 @@ class EngineManagerUnitTestCase(test.NoDBTestCase): notification=notification) self.assertEqual("ignored", notification.status) self.assertFalse(mock_host_failure.called) + + @mock.patch("masakari.compute.nova.API.stop_server") + @mock.patch.object(notification_obj.Notification, "save") + @mock.patch("masakari.compute.nova.API.get_server") + def test_process_notification_type_vm_ignore_instance_in_paused( + self, mock_get_server, mock_notification_save, mock_stop_server): + notification = self._get_vm_type_notification() + mock_get_server.return_value = fakes.FakeNovaClient.Server( + id=1, uuid=uuidsentinel.fake_ins, host='fake_host', + vm_state='paused', ha_enabled=True) + + self.engine.process_notification(self.context, + notification=notification) + self.assertEqual("ignored", notification.status) + self.assertFalse(mock_stop_server.called) + + @mock.patch("masakari.compute.nova.API.stop_server") + @mock.patch.object(notification_obj.Notification, "save") + @mock.patch("masakari.compute.nova.API.get_server") + def test_process_notification_type_vm_ignore_instance_in_rescued( + self, mock_get_server, mock_notification_save, mock_stop_server): + notification = self._get_vm_type_notification() + mock_get_server.return_value = fakes.FakeNovaClient.Server( + id=1, uuid=uuidsentinel.fake_ins, host='fake_host', + vm_state='rescued', ha_enabled=True) + + self.engine.process_notification(self.context, + notification=notification) + self.assertEqual("ignored", notification.status) + self.assertFalse(mock_stop_server.called)