Drop source node allocations if finish_resize fails

By the time finish_resize runs on the dest host, the instance
host/node values are already pointing at the dest (they are
set by resize_instance on the source compute before casting to
finish_resize on the dest). If finish_resize fails, the instance
is essentially stuck on the dest host so rather than revert the
allocations (which will drop the new flavor allocations against
the dest host where the instance now lives) we should just drop
the old flavor allocations on the source node resource provider,
which is what this change does.

The functional regression recreate test is updated to show this
working.

Change-Id: I52c8d038118c858004e17e71b2fba9e9e2714815
Closes-Bug: #1825537
(cherry picked from commit ea297d6ffb)
This commit is contained in:
Matt Riedemann 2019-04-19 12:28:34 -04:00
parent eaa1fc6159
commit e6c6178d22
3 changed files with 28 additions and 17 deletions

View File

@ -4776,7 +4776,22 @@ class ComputeManager(manager.Manager):
migration)
except Exception:
with excutils.save_and_reraise_exception():
self._revert_allocation(context, instance, migration)
# At this point, resize_instance (which runs on the source) has
# already updated the instance host/node values to point to
# this (the dest) compute, so we need to leave the allocations
# against the dest node resource provider intact and drop the
# allocations against the source node resource provider. If the
# user tries to recover the server by hard rebooting it, it
# will happen on this host so that's where the allocations
# should go. Note that this is the same method called from
# confirm_resize to cleanup the source node allocations held
# by the migration record.
LOG.info('Deleting allocations for old flavor on source node '
'%s after finish_resize failure. You may be able to '
'recover the instance by hard rebooting it.',
migration.source_compute, instance=instance)
self._delete_allocation_after_move(
context, instance, migration)
def _finish_resize_helper(self, context, disk_info, image, instance,
migration):

View File

@ -67,16 +67,9 @@ class FinishResizeErrorAllocationCleanupTestCase(
# allocations should still exist with the new flavor.
source_rp_uuid = self._get_provider_uuid_by_host('host1')
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
# FIXME(mriedem): This is bug 1825537 where the allocations are
# reverted when finish_resize fails so the dest node resource provider
# does not have any allocations and the instance allocations are for
# the old flavor on the source node resource provider even though the
# instance is not running on the source host nor pointed at the source
# host in the DB.
# self.assertFlavorMatchesAllocation(
# self.flavor2, server['id'], dest_rp_uuid)
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
self.assertEqual(no_usage, dest_rp_usages)
self.assertFlavorMatchesAllocation(
self.flavor1, server['id'], source_rp_uuid)
self.flavor2, server['id'], dest_rp_uuid)
# And the source node provider should not have any usage.
source_rp_usages = self._get_provider_usages(source_rp_uuid)
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
self.assertEqual(no_usage, source_rp_usages)

View File

@ -3723,10 +3723,13 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
# Ensure the allocation records still exist on the host.
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
# FIXME(mriedem): This is wrong for the _finish_resize case.
# The new_flavor should have been subtracted from the doubled
# allocation which just leaves us with the original flavor.
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor1)
if failing_method == '_finish_resize':
# finish_resize will drop the old flavor allocations.
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor2)
else:
# The new_flavor should have been subtracted from the doubled
# allocation which just leaves us with the original flavor.
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor1)
def test_resize_to_same_host_prep_resize_fails(self):
self._test_resize_to_same_host_instance_fails(