unquiesce instance on volume snapshot failure

This patch adds an exception catch to "snapshot_volume_backed()" of
compute/api.py that catches (at the moment) _all_ exceptions from the
underlying cinderclient. Previously, if the instance is quiesced ( frozen
filesystem ) then the exception will break execution of the function,
skipping the needed unquiesce, and leave the instance in a frozen state.

Now, the exception catch will unquiesce the instance if it was prior to
the failure.

Got a unit test in place with the help of Matt Riedemann.
    test_snapshot_volume_backed_with_quiesce_create_snap_fails

Change-Id: I60de179c72eede6746696f29462ee9d805dace47
Closes-bug: #1731986
(cherry picked from commit bca425a33f)
This commit is contained in:
Eric M Gonzalez 2017-11-13 14:02:27 -06:00 committed by Matt Riedemann
parent e708799c04
commit 7ab98b5345
2 changed files with 61 additions and 29 deletions

View File

@ -2815,6 +2815,8 @@ class API(base.Base):
quiesced = False
if instance.vm_state == vm_states.ACTIVE:
try:
LOG.info("Attempting to quiesce instance before volume "
"snapshot.", instance=instance)
self.compute_rpcapi.quiesce_instance(context, instance)
quiesced = True
except (exception.InstanceQuiesceNotSupported,
@ -2834,32 +2836,44 @@ class API(base.Base):
@wrap_instance_event(prefix='api')
def snapshot_instance(self, context, instance, bdms):
mapping = []
for bdm in bdms:
if bdm.no_device:
continue
try:
for bdm in bdms:
if bdm.no_device:
continue
if bdm.is_volume:
# create snapshot based on volume_id
volume = self.volume_api.get(context, bdm.volume_id)
# NOTE(yamahata): Should we wait for snapshot creation?
# Linux LVM snapshot creation completes in
# short time, it doesn't matter for now.
name = _('snapshot for %s') % image_meta['name']
LOG.debug('Creating snapshot from volume %s.',
volume['id'],
instance=instance)
snapshot = self.volume_api.create_snapshot_force(
context, volume['id'], name,
volume['display_description'])
mapping_dict = block_device.snapshot_from_bdm(
snapshot['id'],
bdm)
mapping_dict = mapping_dict.get_image_mapping()
else:
mapping_dict = bdm.get_image_mapping()
if bdm.is_volume:
# create snapshot based on volume_id
volume = self.volume_api.get(context, bdm.volume_id)
# NOTE(yamahata): Should we wait for snapshot creation?
# Linux LVM snapshot creation completes in short time,
# it doesn't matter for now.
name = _('snapshot for %s') % image_meta['name']
LOG.debug('Creating snapshot from volume %s.',
volume['id'], instance=instance)
snapshot = self.volume_api.create_snapshot_force(
context, volume['id'],
name, volume['display_description'])
mapping_dict = block_device.snapshot_from_bdm(
snapshot['id'], bdm)
mapping_dict = mapping_dict.get_image_mapping()
else:
mapping_dict = bdm.get_image_mapping()
mapping.append(mapping_dict)
return mapping
mapping.append(mapping_dict)
return mapping
# NOTE(tasker): No error handling is done in the above for loop.
# This means that if the snapshot fails and throws an exception
# the traceback will skip right over the unquiesce needed below.
# Here, catch any exception, unquiesce the instance, and raise the
# error so that the calling function can do what it needs to in
# order to properly treat a failed snap.
except Exception:
with excutils.save_and_reraise_exception():
if quiesced:
LOG.info("Unquiescing instance after volume snapshot "
"failure.", instance=instance)
self.compute_rpcapi.unquiesce_instance(
context, instance, mapping)
self._record_action_start(context, instance,
instance_actions.CREATE_IMAGE)

View File

@ -2929,7 +2929,8 @@ class _ComputeAPIUnitTestMixIn(object):
instance)
def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails,
vm_state=vm_states.ACTIVE):
vm_state=vm_states.ACTIVE,
snapshot_fails=False):
fake_sys_meta = {'image_min_ram': '11',
'image_min_disk': '22',
'image_container_format': 'ami',
@ -2975,6 +2976,8 @@ class _ComputeAPIUnitTestMixIn(object):
return {'id': volume_id, 'display_description': ''}
def fake_volume_create_snapshot(context, volume_id, name, description):
if snapshot_fails:
raise exception.OverQuota(overs="snapshots")
return {'id': '%s-snapshot' % volume_id}
def fake_quiesce_instance(context, instance):
@ -3039,8 +3042,13 @@ class _ComputeAPIUnitTestMixIn(object):
mock.patch.object(compute_utils, 'EventReporter')) as (
mock_record, mock_event):
# All the db_only fields and the volume ones are removed
self.compute_api.snapshot_volume_backed(
self.context, instance, 'test-snapshot')
if snapshot_fails:
self.assertRaises(exception.OverQuota,
self.compute_api.snapshot_volume_backed,
self.context, instance, "test-snapshot")
else:
self.compute_api.snapshot_volume_backed(
self.context, instance, 'test-snapshot')
self.assertEqual(quiesce_expected, quiesced[0])
self.assertEqual(quiesce_expected, quiesced[1])
@ -3090,8 +3098,13 @@ class _ComputeAPIUnitTestMixIn(object):
mock_record, mock_event):
# Check that the mappings from the image properties are not
# included
self.compute_api.snapshot_volume_backed(
self.context, instance, 'test-snapshot')
if snapshot_fails:
self.assertRaises(exception.OverQuota,
self.compute_api.snapshot_volume_backed,
self.context, instance, "test-snapshot")
else:
self.compute_api.snapshot_volume_backed(
self.context, instance, 'test-snapshot')
self.assertEqual(quiesce_expected, quiesced[0])
self.assertEqual(quiesce_expected, quiesced[1])
@ -3109,6 +3122,11 @@ class _ComputeAPIUnitTestMixIn(object):
def test_snapshot_volume_backed_with_quiesce(self):
self._test_snapshot_volume_backed(True, False)
def test_snapshot_volume_backed_with_quiesce_create_snap_fails(self):
self._test_snapshot_volume_backed(quiesce_required=True,
quiesce_fails=False,
snapshot_fails=True)
def test_snapshot_volume_backed_with_quiesce_skipped(self):
self._test_snapshot_volume_backed(False, True)