Create BDMs/tags in cell with instance when over-quota

If the server create build request fails the quota check
after the instance record has been created in a cell, we also
need to create the BDMs and tags in that cell so that users
can still see the tags on the server and so the API can
properly cleanup volume attachments when the server is deleted.

This change updates _cleanup_build_artifacts to create BDMs
and tags in the same cell as the instance prior to deleting the
build request and request spec and adjusts the assertions in the
related functional test to show the bug is fixed.

As for instances that get buried in cell0 due to scheduling
failures, the tags are not created there so comments are left
in those code paths to fix that issue as well, but that can be
done separately from this patch.

Change-Id: I1a9bdb596f74605ab4613c9cb2574e976aebbd8c
Closes-Bug: #1806064
(cherry picked from commit 6d0386058b)
(cherry picked from commit 3028d25705)
This commit is contained in:
Matt Riedemann 2018-12-03 15:12:12 -05:00
parent 3f37d0bba4
commit 3dd42c2cd6
2 changed files with 33 additions and 15 deletions

View File

@ -1145,6 +1145,8 @@ class ComputeTaskManager(base.Base):
instance_uuids, return_alternates=True)
except Exception as exc:
LOG.exception('Failed to schedule instances')
# FIXME(mriedem): If the tags are not persisted with the instance
# in cell0 then the API will not show them.
self._bury_in_cell0(context, request_specs[0], exc,
build_requests=build_requests,
block_device_mapping=block_device_mapping)
@ -1170,6 +1172,8 @@ class ComputeTaskManager(base.Base):
LOG.error('No host-to-cell mapping found for selected '
'host %(host)s. Setup is incomplete.',
{'host': host.service_host})
# FIXME(mriedem): If the tags are not persisted with the
# instance in cell0 then the API will not show them.
self._bury_in_cell0(
context, request_spec, exc,
build_requests=[build_request], instances=[instance],
@ -1221,6 +1225,7 @@ class ComputeTaskManager(base.Base):
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
request_specs,
block_device_mapping, tags,
cell_mapping_cache)
zipped = six.moves.zip(build_requests, request_specs, host_lists,
@ -1297,7 +1302,8 @@ class ComputeTaskManager(base.Base):
limits=host.limits, host_list=host_list)
def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
request_specs, cell_mapping_cache):
request_specs, block_device_mappings, tags,
cell_mapping_cache):
for (instance, build_request, request_spec) in six.moves.zip(
instances, build_requests, request_specs):
# Skip placeholders that were buried in cell0 or had their
@ -1318,6 +1324,21 @@ class ComputeTaskManager(base.Base):
inst_mapping.cell_mapping = cell
inst_mapping.save()
# In order to properly clean-up volumes when deleting a server in
# ERROR status with no host, we need to store BDMs in the same
# cell.
if block_device_mappings:
self._create_block_device_mapping(
cell, instance.flavor, instance.uuid,
block_device_mappings)
# Like BDMs, the server tags provided by the user when creating the
# server should be persisted in the same cell so they can be shown
# from the API.
if tags:
with nova_context.target_cell(context, cell) as cctxt:
self._create_tags(cctxt, instance.uuid, tags)
# Be paranoid about artifacts being deleted underneath us.
try:
build_request.destroy()

View File

@ -117,14 +117,16 @@ class BootFromVolumeOverQuotaRaceDeleteTest(
# This would raise InstanceNotFound if the instance isn't in cell1.
instance = objects.Instance.get_by_uuid(cctxt, server['id'])
self.assertIsNone(instance.host, 'instance.host should not be set')
# Make sure the BDMs and tags also exist in cell1.
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
cctxt, instance.uuid)
self.assertEqual(1, len(bdms), 'BDMs were not created in cell1')
tags = objects.TagList.get_by_resource_id(cctxt, instance.uuid)
self.assertEqual(1, len(tags), 'Tags were not created in cell1')
# Make sure we can still view the tags on the server before it is
# deleted.
# FIXME(mriedem): This is bug 1806064 where the tags are not created
# in cell1 along with the instance when the quota check race failure
# occurs. Uncomment once fixed.
# self.assertEqual(['bfv'], server['tags'])
self.assertEqual([], server['tags'])
self.assertEqual(['bfv'], server['tags'])
# Now delete the server which, since it does not have a host, will be
# deleted "locally" from the API.
@ -132,12 +134,7 @@ class BootFromVolumeOverQuotaRaceDeleteTest(
self._wait_until_deleted(server)
# The volume should have been detached by the API.
# FIXME(mriedem): This is bug 1806064 where the volume is not detached
# because the related BDM record was not created in cell1 along with
# the instance so the API could not "see" it. Uncomment once fixed.
# self.assertIsNone(
# self.cinder_fixture.volume_ids_for_instance(server['id']))
self.assertEqual(volume_id,
# volume_ids_for_instance is a generator so listify
list(self.cinder_fixture.volume_ids_for_instance(
server['id']))[0])
attached_volumes = self.cinder_fixture.volume_ids_for_instance(
server['id'])
# volume_ids_for_instance is a generator so listify
self.assertEqual(0, len(list(attached_volumes)))