From bcae081c4672a13eb1a75d1d3d81a439b5fdb796 Mon Sep 17 00:00:00 2001 From: Eric M Gonzalez Date: Thu, 8 Mar 2018 09:11:25 -0600 Subject: [PATCH] unquiesce instance after quiesce failure If the call to compute_rpcapi.quisece_instance() raises an exception, any uncaught exception will break out of the function snapshot_volume_backed(). This can leave the instance in frozen state. This patch adds a blanket Exception catch to the try block and calls compute_rpcapi.unquiesce_instance() before reraising. This has been seen in the wild with RPC timeouts, but this is not the only possible genesis for an unknown error from quiesce_instance. Change-Id: Idca5998da8bb42b29a8fffdf52b4af3a043c6326 Closes-Bug: #1754360 (cherry picked from commit 1e77faaa412ab9909dd9491cab4a819b5c84d3e8) --- nova/compute/api.py | 10 +++++ nova/tests/unit/compute/test_compute_api.py | 48 ++++++++++++++------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/nova/compute/api.py b/nova/compute/api.py index 5e97fe6e591c..ce6cfdadf560 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2957,6 +2957,16 @@ class API(base.Base): LOG.info('Skipping quiescing instance: %(reason)s.', {'reason': err}, instance=instance) + # NOTE(tasker): discovered that an uncaught exception could occur + # after the instance has been frozen. catch and thaw. + except Exception as ex: + with excutils.save_and_reraise_exception(): + LOG.error("An error occurred during quiesce of instance. " + "Unquiescing to ensure instance is thawed. " + "Error: %s", six.text_type(ex), + instance=instance) + self.compute_rpcapi.unquiesce_instance(context, instance, + mapping=None) @wrap_instance_event(prefix='api') def snapshot_instance(self, context, instance, bdms): diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index ea35cfa3469e..a3cdecb17e38 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -3047,7 +3047,9 @@ class _ComputeAPIUnitTestMixIn(object): mock_is_volume_backed.assert_called_once_with(self.context, instance) - def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails, + def _test_snapshot_volume_backed(self, quiesce_required=False, + quiesce_fails=False, + quiesce_unsupported=False, vm_state=vm_states.ACTIVE, snapshot_fails=False, limits=None): fake_sys_meta = {'image_min_ram': '11', @@ -3081,7 +3083,8 @@ class _ComputeAPIUnitTestMixIn(object): expect_meta['properties']['os_require_quiesce'] = 'yes' quiesced = [False, False] - quiesce_expected = not quiesce_fails and vm_state == vm_states.ACTIVE + quiesce_expected = not (quiesce_unsupported or quiesce_fails) \ + and vm_state == vm_states.ACTIVE @classmethod def fake_bdm_list_get_by_instance_uuid(cls, context, instance_uuid): @@ -3100,9 +3103,11 @@ class _ComputeAPIUnitTestMixIn(object): return {'id': '%s-snapshot' % volume_id} def fake_quiesce_instance(context, instance): - if quiesce_fails: + if quiesce_unsupported: raise exception.InstanceQuiesceNotSupported( - instance_id=instance['uuid'], reason='test') + instance_id=instance['uuid'], reason='unsupported') + if quiesce_fails: + raise oslo_exceptions.MessagingTimeout('quiece timeout') quiesced[0] = True def fake_unquiesce_instance(context, instance, mapping=None): @@ -3249,14 +3254,21 @@ class _ComputeAPIUnitTestMixIn(object): instance.uuid) def test_snapshot_volume_backed(self): - self._test_snapshot_volume_backed(False, False) + self._test_snapshot_volume_backed(quiesce_required=False, + quiesce_unsupported=False) - def test_snapshot_volume_backed_with_quiesce(self): - self._test_snapshot_volume_backed(True, False) + def test_snapshot_volume_backed_with_quiesce_unsupported(self): + self._test_snapshot_volume_backed(quiesce_required=True, + quiesce_unsupported=False) + + def test_snaphost_volume_backed_with_quiesce_failure(self): + self.assertRaises(oslo_exceptions.MessagingTimeout, + self._test_snapshot_volume_backed, + quiesce_required=True, + quiesce_fails=True) def test_snapshot_volume_backed_with_quiesce_create_snap_fails(self): self._test_snapshot_volume_backed(quiesce_required=True, - quiesce_fails=False, snapshot_fails=True) def test_snapshot_volume_backed_unlimited_quota(self): @@ -3264,8 +3276,7 @@ class _ComputeAPIUnitTestMixIn(object): don't perform a quota check. """ limits = {'maxTotalSnapshots': -1, 'totalSnapshotsUsed': 0} - self._test_snapshot_volume_backed( - quiesce_required=False, quiesce_fails=False, limits=limits) + self._test_snapshot_volume_backed(limits=limits) def test_snapshot_volume_backed_over_quota_before_snapshot(self): """Tests that the up-front check on quota fails before actually @@ -3274,26 +3285,31 @@ class _ComputeAPIUnitTestMixIn(object): limits = {'maxTotalSnapshots': 1, 'totalSnapshotsUsed': 1} self.assertRaises(exception.OverQuota, self._test_snapshot_volume_backed, - quiesce_required=False, quiesce_fails=False, limits=limits) def test_snapshot_volume_backed_with_quiesce_skipped(self): - self._test_snapshot_volume_backed(False, True) + self._test_snapshot_volume_backed(quiesce_required=False, + quiesce_unsupported=True) def test_snapshot_volume_backed_with_quiesce_exception(self): self.assertRaises(exception.NovaException, - self._test_snapshot_volume_backed, True, True) + self._test_snapshot_volume_backed, + quiesce_required=True, + quiesce_unsupported=True) def test_snapshot_volume_backed_with_quiesce_stopped(self): - self._test_snapshot_volume_backed(True, True, + self._test_snapshot_volume_backed(quiesce_required=True, + quiesce_unsupported=True, vm_state=vm_states.STOPPED) def test_snapshot_volume_backed_with_quiesce_suspended(self): - self._test_snapshot_volume_backed(True, True, + self._test_snapshot_volume_backed(quiesce_required=True, + quiesce_unsupported=True, vm_state=vm_states.SUSPENDED) def test_snapshot_volume_backed_with_suspended(self): - self._test_snapshot_volume_backed(False, True, + self._test_snapshot_volume_backed(quiesce_required=False, + quiesce_unsupported=True, vm_state=vm_states.SUSPENDED) @mock.patch.object(context, 'set_target_cell')