Merge "fix cleaning up evacuated instances"

This commit is contained in:
Zuul 2017-10-17 18:08:14 +00:00 committed by Gerrit Code Review
commit e11a8687ae
3 changed files with 157 additions and 42 deletions

View File

@ -653,6 +653,12 @@ class ComputeManager(manager.Manager):
local_instances = self._get_instances_on_driver(context)
evacuated = [inst for inst in local_instances
if inst.uuid in evacuations]
# NOTE(gibi): We are called from init_host and at this point the
# compute_nodes of the resource tracker has not been populated yet so
# we cannot rely on the resource tracker here.
compute_nodes = {}
for instance in evacuated:
migration = evacuations[instance.uuid]
LOG.info('Deleting instance as it has been evacuated from '
@ -676,9 +682,28 @@ class ComputeManager(manager.Manager):
network_info,
bdi, destroy_disks)
rt = self._get_resource_tracker()
rt.delete_allocation_for_evacuated_instance(
instance, migration.source_node)
# delete the allocation of the evacuated instance from this host
if migration.source_node not in compute_nodes:
try:
cn_uuid = objects.ComputeNode.get_by_host_and_nodename(
context, self.host, migration.source_node).uuid
compute_nodes[migration.source_node] = cn_uuid
except exception.ComputeHostNotFound:
LOG.error("Failed to clean allocation of evacuated "
"instance as the source node %s is not found",
migration.source_node, instance=instance)
continue
cn_uuid = compute_nodes[migration.source_node]
my_resources = scheduler_utils.resources_from_flavor(
instance, instance.flavor)
res = self.reportclient.remove_provider_from_instance_allocation(
instance.uuid, cn_uuid, instance.user_id,
instance.project_id, my_resources)
if not res:
LOG.error("Failed to clean allocation of evacuated instance "
"on the source node %s",
cn_uuid, instance=instance)
migration.status = 'completed'
migration.save()

View File

@ -1670,44 +1670,24 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
# start up the source compute
# NOTE(gibi): This is bug 1721652. The restart should go through
# without exception.
self.assertRaises(KeyError, self._restart_compute_service,
self.compute1)
self._restart_compute_service(self.compute1)
self.admin_api.put_service(
source_compute_id, {'forced_down': 'false'})
# NOTE(gibi): This is bug 1721652. After the source host is started up
# the allocation should be cleaned.
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
self.assertEqual({'VCPU': 0,
'MEMORY_MB': 0,
'DISK_GB': 0},
source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(2, len(allocations))
self.assertEqual(1, len(allocations))
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
# NOTE(gibi): Once the bug 1721652 is fixed this should be the expected
# behavior
# source_usages = self._get_provider_usages(source_rp_uuid)
# self.assertEqual({'VCPU': 0,
# 'MEMORY_MB': 0,
# 'DISK_GB': 0},
# source_usages)
#
# dest_usages = self._get_provider_usages(dest_rp_uuid)
# self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
#
# allocations = self._get_allocations_by_server_uuid(server['id'])
# self.assertEqual(1, len(allocations))
# dest_allocation = allocations[dest_rp_uuid]['resources']
# self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
self._delete_and_check_allocations(
server, source_rp_uuid, dest_rp_uuid)

View File

@ -650,8 +650,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.compute.init_virt_events()
self.assertFalse(mock_register.called)
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'delete_allocation_for_evacuated_instance')
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
@mock.patch('nova.scheduler.utils.resources_from_flavor')
@mock.patch.object(manager.ComputeManager, '_get_instances_on_driver')
@mock.patch.object(manager.ComputeManager, 'init_virt_events')
@mock.patch.object(context, 'get_admin_context')
@ -663,7 +663,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
@mock.patch('nova.objects.Migration.save')
def test_init_host_with_evacuated_instance(self, mock_save, mock_mig_get,
mock_temp_mut, mock_init_host, mock_destroy, mock_host_get,
mock_admin_ctxt, mock_init_virt, mock_get_inst, mock_delete_alloc):
mock_admin_ctxt, mock_init_virt, mock_get_inst, mock_resources,
mock_get_node):
our_host = self.compute.host
not_our_host = 'not-' + our_host
@ -674,15 +675,29 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
mock_mig_get.return_value = [migration]
mock_admin_ctxt.return_value = self.context
mock_host_get.return_value = objects.InstanceList()
our_node = objects.ComputeNode(host=our_host, uuid=uuids.our_node_uuid)
mock_get_node.return_value = our_node
mock_resources.return_value = mock.sentinel.my_resources
# simulate failed instance
mock_get_inst.return_value = [deleted_instance]
with mock.patch.object(
with test.nested(
mock.patch.object(
self.compute.network_api, 'get_instance_nw_info',
side_effect = exception.InstanceNotFound(
instance_id=deleted_instance['uuid'])) as mock_get_net:
instance_id=deleted_instance['uuid'])),
mock.patch.object(
self.compute.reportclient,
'remove_provider_from_instance_allocation')
) as (mock_get_net, mock_remove_allocation):
self.compute.init_host()
mock_remove_allocation.assert_called_once_with(
deleted_instance.uuid, uuids.our_node_uuid,
deleted_instance.user_id, deleted_instance.project_id,
mock.sentinel.my_resources)
mock_init_host.assert_called_once_with(host=our_host)
mock_host_get.assert_called_once_with(self.context, our_host,
expected_attrs=['info_cache', 'metadata'])
@ -696,8 +711,6 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
mock_destroy.assert_called_once_with(self.context, deleted_instance,
mock.ANY, mock.ANY, mock.ANY)
mock_save.assert_called_once_with()
mock_delete_alloc.assert_called_once_with(
deleted_instance, migration.source_node)
def test_init_instance_with_binding_failed_vif_type(self):
# this instance will plug a 'binding_failed' vif
@ -3334,16 +3347,21 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
def test_destroy_evacuated_instances(self):
our_host = self.compute.host
instance_1 = objects.Instance(self.context)
flavor = objects.Flavor()
instance_1 = objects.Instance(self.context, flavor=flavor)
instance_1.uuid = uuids.instance_1
instance_1.task_state = None
instance_1.vm_state = vm_states.ACTIVE
instance_1.host = 'not-' + our_host
instance_2 = objects.Instance(self.context)
instance_1.user_id = uuids.user_id
instance_1.project_id = uuids.project_id
instance_2 = objects.Instance(self.context, flavor=flavor)
instance_2.uuid = uuids.instance_2
instance_2.task_state = None
instance_2.vm_state = vm_states.ACTIVE
instance_2.host = 'not-' + our_host
instance_2.user_id = uuids.user_id
instance_2.project_id = uuids.project_id
# Only instance 2 has a migration record
migration = objects.Migration(instance_uuid=instance_2.uuid)
@ -3351,6 +3369,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
migration.status = 'done'
migration.source_node = 'fake-node'
our_node = objects.ComputeNode(
host=our_host, uuid=uuids.our_node_uuid)
with test.nested(
mock.patch.object(self.compute, '_get_instances_on_driver',
return_value=[instance_1,
@ -3364,19 +3385,108 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
mock.patch.object(self.compute.driver, 'destroy'),
mock.patch('nova.objects.MigrationList.get_by_filters'),
mock.patch('nova.objects.Migration.save'),
mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'delete_allocation_for_evacuated_instance')
mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'),
mock.patch('nova.scheduler.utils.resources_from_flavor'),
mock.patch.object(self.compute.reportclient,
'remove_provider_from_instance_allocation')
) as (_get_instances_on_driver, get_instance_nw_info,
_get_instance_block_device_info, _is_instance_storage_shared,
destroy, migration_list, migration_save, remove_allocation):
destroy, migration_list, migration_save, get_node,
get_resources, remove_allocation):
migration_list.return_value = [migration]
get_node.return_value = our_node
get_resources.return_value = mock.sentinel.resources
self.compute._destroy_evacuated_instances(self.context)
# Only instance 2 should be deleted. Instance 1 is still running
# here, but no migration from our host exists, so ignore it
destroy.assert_called_once_with(self.context, instance_2, None,
{}, True)
get_node.assert_called_once_with(
self.context, our_host, migration.source_node)
remove_allocation.assert_called_once_with(
instance_2, migration.source_node)
instance_2.uuid, uuids.our_node_uuid, uuids.user_id,
uuids.project_id, mock.sentinel.resources)
def test_destroy_evacuated_instances_node_deleted(self):
our_host = self.compute.host
flavor = objects.Flavor()
instance_1 = objects.Instance(self.context, flavor=flavor)
instance_1.uuid = uuids.instance_1
instance_1.task_state = None
instance_1.vm_state = vm_states.ACTIVE
instance_1.host = 'not-' + our_host
instance_1.user_id = uuids.user_id
instance_1.project_id = uuids.project_id
instance_2 = objects.Instance(self.context, flavor=flavor)
instance_2.uuid = uuids.instance_2
instance_2.task_state = None
instance_2.vm_state = vm_states.ACTIVE
instance_2.host = 'not-' + our_host
instance_2.user_id = uuids.user_id
instance_2.project_id = uuids.project_id
migration_1 = objects.Migration(instance_uuid=instance_1.uuid)
# Consider the migration successful but the node was deleted while the
# compute was down
migration_1.status = 'done'
migration_1.source_node = 'deleted-node'
migration_2 = objects.Migration(instance_uuid=instance_2.uuid)
# Consider the migration successful
migration_2.status = 'done'
migration_2.source_node = 'fake-node'
our_node = objects.ComputeNode(
host=our_host, uuid=uuids.our_node_uuid)
with test.nested(
mock.patch.object(self.compute, '_get_instances_on_driver',
return_value=[instance_1,
instance_2]),
mock.patch.object(self.compute.network_api, 'get_instance_nw_info',
return_value=None),
mock.patch.object(self.compute, '_get_instance_block_device_info',
return_value={}),
mock.patch.object(self.compute, '_is_instance_storage_shared',
return_value=False),
mock.patch.object(self.compute.driver, 'destroy'),
mock.patch('nova.objects.MigrationList.get_by_filters'),
mock.patch('nova.objects.Migration.save'),
mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'),
mock.patch('nova.scheduler.utils.resources_from_flavor'),
mock.patch.object(self.compute.reportclient,
'remove_provider_from_instance_allocation')
) as (_get_instances_on_driver, get_instance_nw_info,
_get_instance_block_device_info, _is_instance_storage_shared,
destroy, migration_list, migration_save, get_node,
get_resources, remove_allocation):
migration_list.return_value = [migration_1, migration_2]
def fake_get_node(context, host, node):
if node == 'fake-node':
return our_node
else:
raise exception.ComputeHostNotFound(host=host)
get_node.side_effect = fake_get_node
get_resources.return_value = mock.sentinel.resources
self.compute._destroy_evacuated_instances(self.context)
# both instance_1 and instance_2 is destroyed in the driver
destroy.assert_has_calls(
[mock.call(self.context, instance_1, None, {}, True),
mock.call(self.context, instance_2, None, {}, True)])
# but only instance_2 is deallocated as the compute node for
# instance_1 is already deleted
remove_allocation.assert_called_once_with(
instance_2.uuid, uuids.our_node_uuid, uuids.user_id,
uuids.project_id, mock.sentinel.resources)
self.assertEqual(2, get_node.call_count)
@mock.patch('nova.compute.manager.ComputeManager.'
'_destroy_evacuated_instances')