Ignore instance recovery for 'paused' or 'rescued' instance

If masakari receives instance failure notification it fails to
recover that instance if it is in 'paused' or 'rescued' state.
As a recovery action masakari-engine gives call to nova to stop
the instance but as nova doesn't allow this it returns 409 which
result into instance recovery failure and masakari marks that
notification status as "error".

This can be solved by maintaning consistency between the vm_state
before and after recovery but it requires to start the instance
again to gain the qemu process of an instance back alive which
might change the internal state of the instance which results into
inconsistency between instance state before and after recovery.
So as a solution this patch proposes to ignore the instance recovery
and logs a warning if the instance is in 'paused' or 'rescued' state.

Closes-Bug: #1663513
Change-Id: Id1cce45aad253527bedb58ab32f3d89637e02582
This commit is contained in:
dineshbhor 2017-04-06 19:22:24 +05:30
parent c9f7a3c56d
commit 7aef2966f5
4 changed files with 43 additions and 0 deletions

View File

@ -56,6 +56,13 @@ class StopInstanceTask(base.MasakariTask):
raise exception.SkipInstanceRecoveryException()
vm_state = getattr(instance, 'OS-EXT-STS:vm_state')
if vm_state in ['paused', 'rescued']:
msg = _("Recovery of instance '%(instance_uuid)s' is ignored as"
" it is in '%(vm_state)s' state.") % {
'instance_uuid': instance_uuid, 'vm_state': vm_state}
LOG.warning(msg)
raise exception.IgnoreInstanceRecoveryException(msg)
if vm_state != 'stopped':
if vm_state == 'resized':
self.novaclient.reset_instance_state(

View File

@ -110,6 +110,8 @@ class MasakariManager(manager.Manager):
self.driver.execute_instance_failure(
context, notification.payload.get('instance_uuid'),
notification.notification_uuid)
except exception.IgnoreInstanceRecoveryException:
notification_status = fields.NotificationStatus.IGNORED
except exception.SkipInstanceRecoveryException:
notification_status = fields.NotificationStatus.FINISHED
except (exception.MasakariException,

View File

@ -352,3 +352,7 @@ class ReservedHostsUnavailable(MasakariException):
class LockAlreadyAcquired(MasakariException):
msg_fmt = _('Lock is already acquired on %(resource)s.')
class IgnoreInstanceRecoveryException(MasakariException):
msg_fmt = _('Instance recovery is ignored.')

View File

@ -388,3 +388,33 @@ class EngineManagerUnitTestCase(test.NoDBTestCase):
notification=notification)
self.assertEqual("ignored", notification.status)
self.assertFalse(mock_host_failure.called)
@mock.patch("masakari.compute.nova.API.stop_server")
@mock.patch.object(notification_obj.Notification, "save")
@mock.patch("masakari.compute.nova.API.get_server")
def test_process_notification_type_vm_ignore_instance_in_paused(
self, mock_get_server, mock_notification_save, mock_stop_server):
notification = self._get_vm_type_notification()
mock_get_server.return_value = fakes.FakeNovaClient.Server(
id=1, uuid=uuidsentinel.fake_ins, host='fake_host',
vm_state='paused', ha_enabled=True)
self.engine.process_notification(self.context,
notification=notification)
self.assertEqual("ignored", notification.status)
self.assertFalse(mock_stop_server.called)
@mock.patch("masakari.compute.nova.API.stop_server")
@mock.patch.object(notification_obj.Notification, "save")
@mock.patch("masakari.compute.nova.API.get_server")
def test_process_notification_type_vm_ignore_instance_in_rescued(
self, mock_get_server, mock_notification_save, mock_stop_server):
notification = self._get_vm_type_notification()
mock_get_server.return_value = fakes.FakeNovaClient.Server(
id=1, uuid=uuidsentinel.fake_ins, host='fake_host',
vm_state='rescued', ha_enabled=True)
self.engine.process_notification(self.context,
notification=notification)
self.assertEqual("ignored", notification.status)
self.assertFalse(mock_stop_server.called)