Fix assisted volume snapshots race condition

While performing assisted volume snapshots, we're setting the
instance task state to 'image_snapshot_pending' in order to prevent
instance actions that would impact this operation.

The issue is that we're clearing the task state only after calling
back the Cinder API, telling it that the snapshot was created or
deleted. For this reason, a subsequent assisted snapshot request
will be rejected by the Nova API if the task state is not cleared
in time.

This race condition affects quite a few tempest tests that cover
this scenario.

This change ensures that the instance task state is cleared before
calling back the Cinder API.

Closes-Bug: #1739423

Change-Id: I1ae57c109ed551ba03d49b2ac7c6318b3939526d
This commit is contained in:
Lucian Petrut 2017-12-20 15:09:15 +02:00
parent abf513d967
commit b6a23479d9
1 changed files with 15 additions and 10 deletions

View File

@ -298,6 +298,7 @@ class VolumeOps(object):
"create_info": create_info})
snapshot_id = create_info['snapshot_id']
snapshot_failed = False
try:
instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
instance.save(expected_task_state=[None])
@ -314,22 +315,23 @@ class VolumeOps(object):
# The volume driver is expected to
# update the connection info.
driver_bdm.save()
self._volume_api.update_snapshot_status(
context, snapshot_id, 'creating')
except Exception:
with excutils.save_and_reraise_exception():
snapshot_failed = True
err_msg = ('Error occurred while snapshotting volume. '
'sending error status to Cinder.')
LOG.exception(err_msg,
instance=instance)
self._volume_api.update_snapshot_status(
context, snapshot_id, 'error')
finally:
instance.task_state = None
instance.save(
expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING])
snapshot_status = 'error' if snapshot_failed else 'creating'
self._volume_api.update_snapshot_status(
context, snapshot_id, snapshot_status)
@volume_snapshot_lock
def volume_snapshot_delete(self, context, instance, volume_id,
snapshot_id, delete_info):
@ -339,6 +341,7 @@ class VolumeOps(object):
"instance_name": instance.name,
"delete_info": delete_info})
snapshot_delete_failed = False
try:
instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
instance.save(expected_task_state=[None])
@ -355,22 +358,24 @@ class VolumeOps(object):
# The volume driver is expected to
# update the connection info.
driver_bdm.save()
self._volume_api.update_snapshot_status(
context, snapshot_id, 'deleting')
except Exception:
with excutils.save_and_reraise_exception():
snapshot_delete_failed = True
err_msg = ('Error occurred while deleting volume '
'snapshot. Sending error status to Cinder.')
LOG.exception(err_msg,
instance=instance)
self._volume_api.update_snapshot_status(
context, snapshot_id, 'error_deleting')
finally:
instance.task_state = None
instance.save(
expected_task_state=[task_states.IMAGE_SNAPSHOT_PENDING])
snapshot_status = ('error_deleting'
if snapshot_delete_failed else 'deleting')
self._volume_api.update_snapshot_status(
context, snapshot_id, snapshot_status)
def get_disk_attachment_info(self, connection_info):
volume_driver = self._get_volume_driver(connection_info)
return volume_driver.get_disk_attachment_info(connection_info)