Don't persist RequestSpec.retry

During a resize, the RequestSpec.flavor is updated
to the new flavor. If the resize failed on one host
and was rescheduled, the RequestSpec.retry is updated
for that failed host and mistakenly persisted, which
can affect later move operations, like if an admin
targets one of those previously failed hosts for a
live migration or evacuate operation.

This change fixes the problem by not ever persisting
the RequestSpec.retry field to the database, since
retries are per-request/operation and not something
that needs to be persisted.

Alternative to this, we could reset the retry field
in the RequestSpec.reset_forced_destinations method
but that would be slightly overloading the meaning
of that method, and the approach taken in this patch
is arguably cleaner since retries shouldn't ever be
persisted. It should be noted, however, that one
advantage to resetting the 'retry' field in the
RequestSpec.reset_forced_destinations method would
be to avoid this issue for any existing DB entries
that have this problem.

The related functional regression test is updated
to show the bug is now fixed.

NOTE(mriedem): This backport also includes change
I61f745667f4c003d7e3ca6f2f9a99194930ac892 squashed
into it in order to not re-introduce that bug. On
Ocata it must be adjusted slightly to pass a string
rather than list to _wait_for_migration_status
since I752617066bb2167b49239ab9d17b0c89754a3e12 is
not in Ocata.

NOTE(mriedem): This patch has to pull some changes
from two other patches to make live migration work
in the fake virt driver: ce893e37f and b97c433f7.

Change-Id: Iadbf8ec935565a6d4ccf6f36ef630ab6bf1bea5d
Closes-Bug: #1718512
(cherry picked from commit 6647f11dc1)
(cherry picked from commit 757dbd17cf)
(cherry picked from commit 878e99d1f8)
This commit is contained in:
Matt Riedemann 2018-04-06 20:28:53 -04:00
parent 0d111f17c4
commit c61ec9d98b
5 changed files with 36 additions and 16 deletions

View File

@ -494,6 +494,9 @@ class RequestSpec(base.NovaObject):
if 'instance_group' in spec and spec.instance_group:
spec.instance_group.members = None
spec.instance_group.hosts = None
# NOTE(mriedem): Don't persist retries since those are per-request
if 'retry' in spec and spec.retry:
spec.retry = None
db_updates = {'spec': jsonutils.dumps(spec.obj_to_primitive())}
if 'instance_uuid' in updates:

View File

@ -357,6 +357,9 @@ class TestCase(testtools.TestCase):
cell_mapping=cell)
hm.create()
self.host_mappings[hm.host] = hm
if host is not None:
# Make sure that CONF.host is relevant to the right hostname
self.useFixture(nova_fixtures.ConfPatcher(host=host))
return svc.service

View File

@ -152,17 +152,13 @@ class TestRequestSpecRetryReschedule(test.TestCase,
data = {'os-migrateLive': {'host': 'host2', 'block_migration': 'auto'}}
self.admin_api.post_server_action(server['id'], data)
server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE')
# FIXME(mriedem): This is bug 1718512 where the failed resize left
# host2 in the RequestSpec.retry field and it affects the live migrate
# to host2 because the scheduler RetryFilter kicks it out.
self.assertEqual('host3', server['OS-EXT-SRV-ATTR:host'])
migrations = self.admin_api.api_get(
'os-migrations?instance_uuid=%s&migration_type=live-migration' %
server['id']).body['migrations']
self.assertEqual(1, len(migrations))
self.assertEqual('error', migrations[0]['status'])
self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host'])
# NOTE(mriedem): The instance status effectively goes to ACTIVE before
# the migration status is changed to "completed" since
# post_live_migration_at_destination changes the instance status
# and _post_live_migration changes the migration status later. So we
# need to poll the migration record until it's complete or we timeout.
self._wait_for_migration_status(server, 'completed')
reqspec = objects.RequestSpec.get_by_instance_uuid(
nova_context.get_admin_context(), server['id'])
self.assertIsNotNone(reqspec.retry)
self.assertEqual(1, reqspec.retry.num_attempts)
self.assertEqual('host2', reqspec.retry.hosts[0].host)
self.assertIsNone(reqspec.retry)

View File

@ -535,13 +535,14 @@ class _TestRequestSpecObject(object):
# object fields
for field in ['image', 'numa_topology', 'pci_requests', 'flavor',
'retry', 'limits']:
'limits']:
self.assertEqual(
getattr(req_obj, field).obj_to_primitive(),
getattr(serialized_obj, field).obj_to_primitive())
self.assertIsNone(serialized_obj.instance_group.members)
self.assertIsNone(serialized_obj.instance_group.hosts)
self.assertIsNone(serialized_obj.retry)
def test_create(self):
req_obj = fake_request_spec.fake_spec_obj(remove_id=True)

View File

@ -37,6 +37,7 @@ from nova.console import type as ctype
from nova import exception
from nova.i18n import _LW
from nova.objects import fields as obj_fields
from nova.objects import migrate_data
from nova.virt import diagnostics
from nova.virt import driver
from nova.virt import hardware
@ -489,11 +490,27 @@ class FakeDriver(driver.ComputeDriver):
src_compute_info, dst_compute_info,
block_migration=False,
disk_over_commit=False):
return {}
data = migrate_data.LibvirtLiveMigrateData()
data.filename = 'fake'
data.image_type = CONF.libvirt.images_type
data.graphics_listen_addr_vnc = CONF.vnc.vncserver_listen
data.graphics_listen_addr_spice = CONF.spice.server_listen
data.serial_listen_addr = None
# Notes(eliqiao): block_migration and disk_over_commit are not
# nullable, so just don't set them if they are None
if block_migration is not None:
data.block_migration = block_migration
if disk_over_commit is not None:
data.disk_over_commit = disk_over_commit
data.disk_available_mb = 100000
data.is_shared_block_storage = True
data.is_shared_instance_path = True
return data
def check_can_live_migrate_source(self, context, instance,
dest_check_data, block_device_info=None):
return
return dest_check_data
def finish_migration(self, context, migration, instance, disk_info,
network_info, image_meta, resize_instance,
@ -505,7 +522,7 @@ class FakeDriver(driver.ComputeDriver):
def pre_live_migration(self, context, instance, block_device_info,
network_info, disk_info, migrate_data):
return
return migrate_data
def unfilter_instance(self, instance, network_info):
return