Merge "Sanity check that new hosts have no instances"

This commit is contained in:
Zuul 2023-10-11 15:48:02 +00:00 committed by Gerrit Code Review
commit 1e75373beb
3 changed files with 44 additions and 8 deletions

View File

@ -1570,6 +1570,18 @@ class ComputeManager(manager.Manager):
LOG.debug('Verified node %s matches my host %s',
node.uuid, self.host)
def _sanity_check_new_host(self):
instances_on_hv = self.driver.list_instance_uuids()
if len(instances_on_hv) > 0:
# This means we have instances on our hypervisor, but we think
# we are a new host (i.e. we created a new service record). That
# likely means we're pointed at an empty database or the wrong
# cell.
raise exception.InvalidConfiguration(
'My hypervisor has existing instances, but I appear to be '
'a new service in this database. Possible database '
'configuration error, refusing to start!')
def init_host(self, service_ref):
"""Initialization for a standalone compute service."""
@ -1578,6 +1590,10 @@ class ComputeManager(manager.Manager):
# to record a locally-persistent node identity because
# we have upgraded from a previous version.
self._ensure_existing_node_identity(service_ref)
else:
# If we are a new service (in the database), make sure we have no
# instances on our hypervisor as we would expect.
self._sanity_check_new_host()
if CONF.pci.device_spec:
# Simply loading the PCI passthrough spec will do a bunch of

View File

@ -120,7 +120,8 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests a scenario where a server is created on a host, the host
goes down, the server is evacuated to another host, and then the
source host compute service is deleted. After that the deleted
compute service is restarted and starts successfully.
compute service is restarted and refuses to run because it finds its
service record deleted even though it has instances.
"""
# Create our source host that we will evacuate *from* later.
host1 = self._start_compute('host1')
@ -154,12 +155,16 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
# Then the resource provider is also deleted.
resp = self.placement.get('/resource_providers/%s' % rp_uuid)
self.assertEqual(404, resp.status)
# Try to restart the host1 compute service to create a new service
# and a new resource provider.
self.restart_compute_service(host1)
# Make sure the compute service record for host1 is recreated.
service = self.admin_api.get_services(
binary='nova-compute', host='host1')[0]
# Try to restart the host1 compute service and make sure it recognizes
# that its service record has been deleted even though it still has
# instances running.
self.assertRaises(exception.InvalidConfiguration,
self.restart_compute_service, host1)
# Make sure the compute service record for host1 is not recreated
# since we aborted startup.
services = self.admin_api.get_services(
binary='nova-compute', host='host1')
self.assertEqual([], services)
def test_migrate_confirm_after_deleted_source_compute(self):
"""Tests a scenario where a server is cold migrated and while in

View File

@ -916,6 +916,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
return instance_obj._make_instance_list(
self.context, objects.InstanceList(), db_list, None)
@mock.patch.object(manager.ComputeManager,
'_sanity_check_new_host')
@mock.patch.object(manager.ComputeManager,
'_ensure_existing_node_identity')
@mock.patch.object(manager.ComputeManager, '_get_nodes')
@ -937,7 +939,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_destroy, mock_admin_ctxt, mock_host_get,
mock_init_host,
mock_error_interrupted, mock_get_nodes,
mock_existing_node):
mock_existing_node, mock_check_new):
mock_admin_ctxt.return_value = self.context
inst_list = _make_instance_list(startup_instances)
mock_host_get.return_value = inst_list
@ -948,6 +950,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute.init_host(None)
mock_check_new.assert_called_once_with()
mock_existing_node.assert_not_called()
mock_validate_pinning.assert_called_once_with(inst_list)
mock_validate_vtpm.assert_called_once_with(inst_list)
@ -1001,6 +1004,18 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_get_nodes.assert_called_once_with(
test.MatchType(nova.context.RequestContext))
def test_init_host_new_with_instances(self):
"""Tests the case where we start up without an existing service_ref,
indicating that we are a new service, but our hypervisor reports
existing instances. This indicates we were moved to another cell,
our database got wiped, etc.
"""
with mock.patch.object(self.compute.driver,
'list_instance_uuids') as mock_insts:
mock_insts.return_value = ['foo']
self.assertRaises(exception.InvalidConfiguration,
self.compute.init_host, None)
@mock.patch('nova.objects.InstanceList')
@mock.patch('nova.objects.MigrationList.get_by_filters')
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids')