Fix assisted volume snapshots race condition

While performing assisted volume snapshots, we're setting the instance task state to 'image_snapshot_pending' in order to prevent instance actions that would impact this operation. The issue is that we're clearing the task state only after calling back the Cinder API, telling it that the snapshot was created or deleted. For this reason, a subsequent assisted snapshot request will be rejected by the Nova API if the task state is not cleared in time. This race condition affects quite a few tempest tests that cover this scenario. This change ensures that the instance task state is cleared before calling back the Cinder API. Closes-Bug: #1739423 Change-Id: I1ae57c109ed551ba03d49b2ac7c6318b3939526d
2017-12-20 15:09:15 +02:00 · 2017-12-20 15:09:15 +02:00 · b6a23479d9
parent abf513d967
commit b6a23479d9
1 changed files with 15 additions and 10 deletions
--- a/compute_hyperv/nova/volumeops.py
+++ b/compute_hyperv/nova/volumeops.py
@ -298,6 +298,7 @@ class VolumeOps(object):
                   "create_info": create_info})
        snapshot_id = create_info['snapshot_id']

+        snapshot_failed = False
        try:
            instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
            instance.save(expected_task_state=[None])
@ -314,22 +315,23 @@ class VolumeOps(object):
            # The volume driver is expected to
            # update the connection info.
            driver_bdm.save()
-
-            self._volume_api.update_snapshot_status(
-                context, snapshot_id, 'creating')
        except Exception:
            with excutils.save_and_reraise_exception():
+                snapshot_failed = True
+
                err_msg = ('Error occurred while snapshotting volume. '
                           'sending error status to Cinder.')
                LOG.exception(err_msg,
                              instance=instance)
-                self._volume_api.update_snapshot_status(
-                    context, snapshot_id, 'error')
        finally:
            instance.task_state = None
            instance.save(
                expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING])

+            snapshot_status = 'error' if snapshot_failed else 'creating'
+            self._volume_api.update_snapshot_status(
+                context, snapshot_id, snapshot_status)
+
    @volume_snapshot_lock
    def volume_snapshot_delete(self, context, instance, volume_id,
                               snapshot_id, delete_info):
@ -339,6 +341,7 @@ class VolumeOps(object):
                   "instance_name": instance.name,
                   "delete_info": delete_info})

+        snapshot_delete_failed = False
        try:
            instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
            instance.save(expected_task_state=[None])
@ -355,22 +358,24 @@ class VolumeOps(object):
            # The volume driver is expected to
            # update the connection info.
            driver_bdm.save()
-
-            self._volume_api.update_snapshot_status(
-                context, snapshot_id, 'deleting')
        except Exception:
            with excutils.save_and_reraise_exception():
+                snapshot_delete_failed = True
+
                err_msg = ('Error occurred while deleting volume '
                           'snapshot. Sending error status to Cinder.')
                LOG.exception(err_msg,
                              instance=instance)
-                self._volume_api.update_snapshot_status(
-                    context, snapshot_id, 'error_deleting')
        finally:
            instance.task_state = None
            instance.save(
                expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING])

+            snapshot_status = ('error_deleting'
+                               if snapshot_delete_failed else 'deleting')
+            self._volume_api.update_snapshot_status(
+                context, snapshot_id, snapshot_status)
+
    def get_disk_attachment_info(self, connection_info):
        volume_driver = self._get_volume_driver(connection_info)
        return volume_driver.get_disk_attachment_info(connection_info)