Do not persist RequestSpec.ignore_hosts

Change Ic3968721d257a167f3f946e5387cd227a7eeec6c in Newton
started setting the RequestSpec.ignore_hosts field to the
source instance.host during resize/cold migrate if
allow_resize_to_same_host=False in config, which it is by
default.

Change I8abdf58a6537dd5e15a012ea37a7b48abd726579 also in
Newton persists changes to the RequestSpec in conductor
in order to save the RequestSpec.flavor for the new flavor.
This inadvertently persists the ignore_hosts field as well.

Later if you try to evacuate or unshelve the server it will ignore
the original source host because of the persisted ignore_hosts
value. This is obviously a problem in a small deployment with only
a few compute nodes (like an edge deployment). As a result, an
evacuation can fail if the only available host is the one being
ignored.

This change does two things:

1. In order to deal with existing corrupted RequestSpecs in the DB,
   this change simply makes conductor overwrite RequestSpec.ignore_hosts
   rather than append during evacuate before calling the scheduler so
   the current instance host (which is down) is filtered out.

   This evacuate code dealing with ignore_hosts goes back to Mitaka:

     I7fe694175bb47f53d281bd62ac200f1c8416682b

   The test_rebuild_instance_with_request_spec unit test is updated
   and renamed to actually be doing an evacuate which is what it was
   intended for, i.e. the host would not change during rebuild.

2. This change makes the RequestSpec no longer persist the ignore_hosts
   field like several other per-operation fields in the RequestSpec.
   The only operations that use ignore_hosts are resize (if
   allow_resize_to_same_host=False), evacuate and live migration, and
   the field gets reset in each case to ignore the source instance.host.

The related functional recreate test is also updated to show the
bug is fixed. Note that as part of that, the confirm_migration method
in the fake virt driver needed to be implemented otherwise trying to
evacuate back to the source host fails with an InstanceExists error since
the confirmResize operation did not remove the guest from the source host.

Conflicts:
      nova/objects/request_spec.py
      nova/tests/unit/conductor/test_conductor.py
      nova/tests/unit/objects/test_request_spec.py

NOTE(mriedem): The (test_)request_spec.py conflicts are due to not
having change I2a78f0754c63381c57e7e1c610d0938b6df0f537 in Pike.
The test_conductor.py conflict is due to not having change
I70b11dd489d222be3d70733355bfe7966df556aa in Pike.

NOTE(mriedem): The functional test is adjusted slightly to pass
a string rather than list to _wait_for_migration_status since
change I752617066bb2167b49239ab9d17b0c89754a3e12 is not in Pike.

Change-Id: I3f488be6f3c399f23ccf2b9ee0d76cd000da0e3e
Closes-Bug: #1669054
(cherry picked from commit e4c998e573)
(cherry picked from commit 76dfb9d0b6)
(cherry picked from commit 31c08d0c7d)
(cherry picked from commit 8f1773a7af)
This commit is contained in:
Matt Riedemann 2019-03-25 11:27:57 -04:00
parent fcd718dcdd
commit efab235f88
6 changed files with 71 additions and 30 deletions

View File

@ -874,8 +874,7 @@ class ComputeTaskManager(base.Base):
elif recreate:
# NOTE(sbauza): Augment the RequestSpec object by excluding
# the source host for avoiding the scheduler to pick it
request_spec.ignore_hosts = request_spec.ignore_hosts or []
request_spec.ignore_hosts.append(instance.host)
request_spec.ignore_hosts = [instance.host]
# NOTE(sbauza): Force_hosts/nodes needs to be reset
# if we want to make sure that the next destination
# is not forced to be the original host

View File

@ -446,7 +446,13 @@ class RequestSpec(base.NovaObject):
# though they should match.
if key in ['id', 'instance_uuid']:
setattr(spec, key, db_spec[key])
else:
elif key == 'ignore_hosts':
# NOTE(mriedem): Do not override the 'ignore_hosts'
# field which is not persisted. It is not a lazy-loadable
# field. If it is not set, set None.
if not spec.obj_attr_is_set(key):
setattr(spec, key, None)
elif key in spec_obj:
setattr(spec, key, getattr(spec_obj, key))
spec._context = context
@ -507,9 +513,11 @@ class RequestSpec(base.NovaObject):
if 'instance_group' in spec and spec.instance_group:
spec.instance_group.members = None
spec.instance_group.hosts = None
# NOTE(mriedem): Don't persist retries since those are per-request
if 'retry' in spec and spec.retry:
spec.retry = None
# NOTE(mriedem): Don't persist retries or ignored hosts since
# those are per-request
for excluded in ('retry', 'ignore_hosts'):
if excluded in spec and getattr(spec, excluded):
setattr(spec, excluded, None)
db_updates = {'spec': jsonutils.dumps(spec.obj_to_primitive())}
if 'instance_uuid' in updates:

View File

@ -68,21 +68,17 @@ class ResizeEvacuateTestCase(integrated_helpers._IntegratedTestBase,
# Disable the host on which the server is now running (host2).
host2.stop()
self.api.force_down_service('host2', 'nova-compute', forced_down=True)
# Now try to evacuate the server back to the original source compute.
# FIXME(mriedem): This is bug 1669054 where the evacuate fails with
# NoValidHost because the RequestSpec.ignore_hosts field has the
# original source host in it which is the only other available host to
# which we can evacuate the server.
req = {'evacuate': {'onSharedStorage': False}}
self.api.post_server_action(server['id'], req,
check_response_status=[500])
# There should be fault recorded with the server.
server = self._wait_for_state_change(self.api, server, 'ERROR')
self.assertIn('fault', server)
self.assertIn('No valid host was found', server['fault']['message'])
# Assert the RequestSpec.ignore_hosts is still populated.
self.api.post_server_action(server['id'], req)
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
# The evacuate flow in the compute manager is annoying in that it
# sets the instance status to ACTIVE before updating the host, so we
# have to wait for the migration record to be 'done' to avoid a race.
self._wait_for_migration_status(server, 'done')
self.assertEqual(self.compute.host, server['OS-EXT-SRV-ATTR:host'])
# Assert the RequestSpec.ignore_hosts field is not populated.
reqspec = objects.RequestSpec.get_by_instance_uuid(
context.get_admin_context(), server['id'])
self.assertIsNotNone(reqspec.ignore_hosts)
self.assertIn(self.compute.host, reqspec.ignore_hosts)
self.assertIsNone(reqspec.ignore_hosts)

View File

@ -1357,16 +1357,16 @@ class _BaseTaskTestCase(object):
instance=inst_obj,
**compute_args)
def test_rebuild_instance_with_request_spec(self):
def test_evacuate_instance_with_request_spec(self):
inst_obj = self._create_fake_instance_obj()
inst_obj.host = 'noselect'
expected_host = 'thebesthost'
expected_node = 'thebestnode'
expected_limits = 'fake-limits'
fake_spec = objects.RequestSpec(ignore_hosts=[])
fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host])
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits,
'request_spec': fake_spec})
'request_spec': fake_spec, 'recreate': True})
with test.nested(
mock.patch.object(self.conductor_manager.compute_rpcapi,
'rebuild_instance'),
@ -1382,10 +1382,9 @@ class _BaseTaskTestCase(object):
self.conductor_manager.rebuild_instance(context=self.context,
instance=inst_obj,
**rebuild_args)
if rebuild_args['recreate']:
reset_fd.assert_called_once_with()
else:
reset_fd.assert_not_called()
reset_fd.assert_called_once_with()
# The RequestSpec.ignore_hosts field should be overwritten.
self.assertEqual([inst_obj.host], fake_spec.ignore_hosts)
select_dest_mock.assert_called_once_with(self.context,
fake_spec, [inst_obj.uuid])
compute_args['host'] = expected_host

View File

@ -504,7 +504,8 @@ class _TestRequestSpecObject(object):
fake_spec['instance_uuid'])
self.assertEqual(1, req_obj.num_instances)
self.assertEqual(['host2', 'host4'], req_obj.ignore_hosts)
# ignore_hosts is not persisted
self.assertIsNone(req_obj.ignore_hosts)
self.assertEqual('fake', req_obj.project_id)
self.assertEqual({'hint': ['over-there']}, req_obj.scheduler_hints)
self.assertEqual(['host1', 'host3'], req_obj.force_hosts)
@ -527,7 +528,7 @@ class _TestRequestSpecObject(object):
jsonutils.loads(changes['spec']))
# primitive fields
for field in ['instance_uuid', 'num_instances', 'ignore_hosts',
for field in ['instance_uuid', 'num_instances',
'project_id', 'scheduler_hints', 'force_hosts',
'availability_zone', 'force_nodes']:
self.assertEqual(getattr(req_obj, field),
@ -543,6 +544,7 @@ class _TestRequestSpecObject(object):
self.assertIsNone(serialized_obj.instance_group.members)
self.assertIsNone(serialized_obj.instance_group.hosts)
self.assertIsNone(serialized_obj.retry)
self.assertIsNone(serialized_obj.ignore_hosts)
def test_create(self):
req_obj = fake_request_spec.fake_spec_obj(remove_id=True)
@ -563,6 +565,39 @@ class _TestRequestSpecObject(object):
self.assertRaises(exception.ObjectActionError, req_obj.create)
def test_save_does_not_persist_requested_fields(self):
req_obj = fake_request_spec.fake_spec_obj(remove_id=True)
req_obj.create()
# change something to make sure _save_in_db is called
expected_destination = request_spec.Destination(host='sample-host')
req_obj.requested_destination = expected_destination
expected_retry = objects.SchedulerRetries(
num_attempts=2,
hosts=objects.ComputeNodeList(objects=[
objects.ComputeNode(host='host1', hypervisor_hostname='node1'),
objects.ComputeNode(host='host2', hypervisor_hostname='node2'),
]))
req_obj.retry = expected_retry
req_obj.ignore_hosts = [uuids.ignored_host]
orig_save_in_db = request_spec.RequestSpec._save_in_db
with mock.patch.object(request_spec.RequestSpec, '_save_in_db') \
as mock_save_in_db:
mock_save_in_db.side_effect = orig_save_in_db
req_obj.save()
mock_save_in_db.assert_called_once()
updates = mock_save_in_db.mock_calls[0][1][2]
# assert that the following fields are not stored in the db
# 1. ignore_hosts
data = jsonutils.loads(updates['spec'])['nova_object.data']
self.assertIsNone(data['ignore_hosts'])
self.assertIsNotNone(data['instance_uuid'])
# also we expect that the following fields are not reset after save
# 1. ignore_hosts
self.assertIsNotNone(req_obj.ignore_hosts)
self.assertEqual([uuids.ignored_host], req_obj.ignore_hosts)
def test_save(self):
req_obj = fake_request_spec.fake_spec_obj()

View File

@ -539,7 +539,11 @@ class FakeDriver(driver.ComputeDriver):
return
def confirm_migration(self, context, migration, instance, network_info):
return
# Confirm migration cleans up the guest from the source host so just
# destroy the guest to remove it from the list of tracked instances
# unless it is a same-host resize.
if migration.source_compute != migration.dest_compute:
self.destroy(context, instance, network_info)
def pre_live_migration(self, context, instance, block_device_info,
network_info, disk_info, migrate_data):