From 17b9b900a249f6f432552fe27a9cdd54c1495b99 Mon Sep 17 00:00:00 2001 From: Eric M Gonzalez Date: Mon, 13 Nov 2017 14:02:27 -0600 Subject: [PATCH] unquiesce instance on volume snapshot failure This patch adds an exception catch to "snapshot_volume_backed()" of compute/api.py that catches (at the moment) _all_ exceptions from the underlying cinderclient. Previously, if the instance is quiesced ( frozen filesystem ) then the exception will break execution of the function, skipping the needed unquiesce, and leave the instance in a frozen state. Now, the exception catch will unquiesce the instance if it was prior to the failure. Got a unit test in place with the help of Matt Riedemann. test_snapshot_volume_backed_with_quiesce_create_snap_fails Conflicts: nova/compute/api.py nova/tests/unit/compute/test_compute_api.py NOTE(mriedem): The conflicts are due to not having change I9ce48e768cc67543f27a6c87c57b47501fff38c2 in Pike. Change-Id: I60de179c72eede6746696f29462ee9d805dace47 Closes-bug: #1731986 (cherry picked from commit bca425a33f52584051348a3ace832be8151299a7) (cherry picked from commit 7ab98b5345f4a023bd209e714cd0aa60b3a31d48) --- nova/compute/api.py | 57 +++++++++++++-------- nova/tests/unit/compute/test_compute_api.py | 28 ++++++++-- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/nova/compute/api.py b/nova/compute/api.py index 8078add299ff..b929e9fac9e6 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2821,6 +2821,8 @@ class API(base.Base): quiesced = False if instance.vm_state == vm_states.ACTIVE: try: + LOG.info("Attempting to quiesce instance before volume " + "snapshot.", instance=instance) self.compute_rpcapi.quiesce_instance(context, instance) quiesced = True except (exception.InstanceQuiesceNotSupported, @@ -2838,28 +2840,43 @@ class API(base.Base): context, instance.uuid) mapping = [] - for bdm in bdms: - if bdm.no_device: - continue + try: + for bdm in bdms: + if bdm.no_device: + continue - if bdm.is_volume: - # create snapshot based on volume_id - volume = self.volume_api.get(context, bdm.volume_id) - # NOTE(yamahata): Should we wait for snapshot creation? - # Linux LVM snapshot creation completes in - # short time, it doesn't matter for now. - name = _('snapshot for %s') % image_meta['name'] - LOG.debug('Creating snapshot from volume %s.', volume['id'], - instance=instance) - snapshot = self.volume_api.create_snapshot_force( - context, volume['id'], name, volume['display_description']) - mapping_dict = block_device.snapshot_from_bdm(snapshot['id'], - bdm) - mapping_dict = mapping_dict.get_image_mapping() - else: - mapping_dict = bdm.get_image_mapping() + if bdm.is_volume: + # create snapshot based on volume_id + volume = self.volume_api.get(context, bdm.volume_id) + # NOTE(yamahata): Should we wait for snapshot creation? + # Linux LVM snapshot creation completes in short time, + # it doesn't matter for now. + name = _('snapshot for %s') % image_meta['name'] + LOG.debug('Creating snapshot from volume %s.', + volume['id'], instance=instance) + snapshot = self.volume_api.create_snapshot_force( + context, volume['id'], + name, volume['display_description']) + mapping_dict = block_device.snapshot_from_bdm( + snapshot['id'], bdm) + mapping_dict = mapping_dict.get_image_mapping() + else: + mapping_dict = bdm.get_image_mapping() - mapping.append(mapping_dict) + mapping.append(mapping_dict) + # NOTE(tasker): No error handling is done in the above for loop. + # This means that if the snapshot fails and throws an exception + # the traceback will skip right over the unquiesce needed below. + # Here, catch any exception, unquiesce the instance, and raise the + # error so that the calling function can do what it needs to in + # order to properly treat a failed snap. + except Exception: + with excutils.save_and_reraise_exception(): + if quiesced: + LOG.info("Unquiescing instance after volume snapshot " + "failure.", instance=instance) + self.compute_rpcapi.unquiesce_instance( + context, instance, mapping) if quiesced: self.compute_rpcapi.unquiesce_instance(context, instance, mapping) diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index f403d082be67..cb8160f096db 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -2624,7 +2624,8 @@ class _ComputeAPIUnitTestMixIn(object): instance) def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails, - vm_state=vm_states.ACTIVE): + vm_state=vm_states.ACTIVE, + snapshot_fails=False): fake_sys_meta = {'image_min_ram': '11', 'image_min_disk': '22', 'image_container_format': 'ami', @@ -2670,6 +2671,8 @@ class _ComputeAPIUnitTestMixIn(object): return {'id': volume_id, 'display_description': ''} def fake_volume_create_snapshot(context, volume_id, name, description): + if snapshot_fails: + raise exception.OverQuota(overs="snapshots") return {'id': '%s-snapshot' % volume_id} def fake_quiesce_instance(context, instance): @@ -2719,8 +2722,13 @@ class _ComputeAPIUnitTestMixIn(object): 'tag': None}) # All the db_only fields and the volume ones are removed - self.compute_api.snapshot_volume_backed( - self.context, instance, 'test-snapshot') + if snapshot_fails: + self.assertRaises(exception.OverQuota, + self.compute_api.snapshot_volume_backed, + self.context, instance, "test-snapshot") + else: + self.compute_api.snapshot_volume_backed( + self.context, instance, 'test-snapshot') self.assertEqual(quiesce_expected, quiesced[0]) self.assertEqual(quiesce_expected, quiesced[1]) @@ -2758,8 +2766,13 @@ class _ComputeAPIUnitTestMixIn(object): quiesced = [False, False] # Check that the mappings from the image properties are not included - self.compute_api.snapshot_volume_backed( - self.context, instance, 'test-snapshot') + if snapshot_fails: + self.assertRaises(exception.OverQuota, + self.compute_api.snapshot_volume_backed, + self.context, instance, "test-snapshot") + else: + self.compute_api.snapshot_volume_backed( + self.context, instance, 'test-snapshot') self.assertEqual(quiesce_expected, quiesced[0]) self.assertEqual(quiesce_expected, quiesced[1]) @@ -2770,6 +2783,11 @@ class _ComputeAPIUnitTestMixIn(object): def test_snapshot_volume_backed_with_quiesce(self): self._test_snapshot_volume_backed(True, False) + def test_snapshot_volume_backed_with_quiesce_create_snap_fails(self): + self._test_snapshot_volume_backed(quiesce_required=True, + quiesce_fails=False, + snapshot_fails=True) + def test_snapshot_volume_backed_with_quiesce_skipped(self): self._test_snapshot_volume_backed(False, True)