From b6a23479d92a8fd770fc6ad6bf20cab4618e399e Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Wed, 20 Dec 2017 15:09:15 +0200 Subject: [PATCH] Fix assisted volume snapshots race condition While performing assisted volume snapshots, we're setting the instance task state to 'image_snapshot_pending' in order to prevent instance actions that would impact this operation. The issue is that we're clearing the task state only after calling back the Cinder API, telling it that the snapshot was created or deleted. For this reason, a subsequent assisted snapshot request will be rejected by the Nova API if the task state is not cleared in time. This race condition affects quite a few tempest tests that cover this scenario. This change ensures that the instance task state is cleared before calling back the Cinder API. Closes-Bug: #1739423 Change-Id: I1ae57c109ed551ba03d49b2ac7c6318b3939526d --- compute_hyperv/nova/volumeops.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/compute_hyperv/nova/volumeops.py b/compute_hyperv/nova/volumeops.py index af22351d..cba02618 100644 --- a/compute_hyperv/nova/volumeops.py +++ b/compute_hyperv/nova/volumeops.py @@ -298,6 +298,7 @@ class VolumeOps(object): "create_info": create_info}) snapshot_id = create_info['snapshot_id'] + snapshot_failed = False try: instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING instance.save(expected_task_state=[None]) @@ -314,22 +315,23 @@ class VolumeOps(object): # The volume driver is expected to # update the connection info. driver_bdm.save() - - self._volume_api.update_snapshot_status( - context, snapshot_id, 'creating') except Exception: with excutils.save_and_reraise_exception(): + snapshot_failed = True + err_msg = ('Error occurred while snapshotting volume. ' 'sending error status to Cinder.') LOG.exception(err_msg, instance=instance) - self._volume_api.update_snapshot_status( - context, snapshot_id, 'error') finally: instance.task_state = None instance.save( expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING]) + snapshot_status = 'error' if snapshot_failed else 'creating' + self._volume_api.update_snapshot_status( + context, snapshot_id, snapshot_status) + @volume_snapshot_lock def volume_snapshot_delete(self, context, instance, volume_id, snapshot_id, delete_info): @@ -339,6 +341,7 @@ class VolumeOps(object): "instance_name": instance.name, "delete_info": delete_info}) + snapshot_delete_failed = False try: instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING instance.save(expected_task_state=[None]) @@ -355,22 +358,24 @@ class VolumeOps(object): # The volume driver is expected to # update the connection info. driver_bdm.save() - - self._volume_api.update_snapshot_status( - context, snapshot_id, 'deleting') except Exception: with excutils.save_and_reraise_exception(): + snapshot_delete_failed = True + err_msg = ('Error occurred while deleting volume ' 'snapshot. Sending error status to Cinder.') LOG.exception(err_msg, instance=instance) - self._volume_api.update_snapshot_status( - context, snapshot_id, 'error_deleting') finally: instance.task_state = None instance.save( expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING]) + snapshot_status = ('error_deleting' + if snapshot_delete_failed else 'deleting') + self._volume_api.update_snapshot_status( + context, snapshot_id, snapshot_status) + def get_disk_attachment_info(self, connection_info): volume_driver = self._get_volume_driver(connection_info) return volume_driver.get_disk_attachment_info(connection_info)