Merge "remove virt driver requires_allocation_refresh"

This commit is contained in:
Zuul 2018-09-22 04:30:34 +00:00 committed by Gerrit Code Review
commit b4a3cdbe6e
7 changed files with 2 additions and 335 deletions

View File

@ -1160,7 +1160,7 @@ class ResourceTracker(object):
continue
def _update_usage_from_instance(self, context, instance, nodename,
is_removed=False, require_allocation_refresh=False):
is_removed=False):
"""Update usage for a single instance."""
uuid = instance['uuid']
@ -1189,10 +1189,6 @@ class ResourceTracker(object):
self.pci_tracker.update_pci_for_instance(context,
instance,
sign=sign)
if require_allocation_refresh:
LOG.debug("Auto-correcting allocations.")
self.reportclient.update_instance_allocation(context, cn,
instance, sign)
# new instance, update compute node resource usage:
self._update_usage(self._get_usage_dict(instance), nodename,
sign=sign)
@ -1228,78 +1224,10 @@ class ResourceTracker(object):
cn.current_workload = 0
cn.running_vms = 0
# NOTE(jaypipes): In Pike, we need to be tolerant of Ocata compute
# nodes that overwrite placement allocations to look like what the
# resource tracker *thinks* is correct. When an instance is
# migrated from an Ocata compute node to a Pike compute node, the
# Pike scheduler will have created a "doubled-up" allocation that
# contains allocated resources against both the source and
# destination hosts. The Ocata source compute host, during its
# update_available_resource() periodic call will find the instance
# in its list of known instances and will call
# update_instance_allocation() in the report client. That call will
# pull the allocations for the instance UUID which will contain
# both the source and destination host providers in the allocation
# set. Seeing that this is different from what the Ocata source
# host thinks it should be and will overwrite the allocation to
# only be an allocation against itself.
#
# And therefore, here we need to have Pike compute hosts
# "correct" the improper healing that the Ocata source host did
# during its periodic interval. When the instance is fully migrated
# to the Pike compute host, the Ocata compute host will find an
# allocation that refers to itself for an instance it no longer
# controls and will *delete* all allocations that refer to that
# instance UUID, assuming that the instance has been deleted. We
# need the destination Pike compute host to recreate that
# allocation to refer to its own resource provider UUID.
#
# For Pike compute nodes that migrate to either a Pike compute host
# or a Queens compute host, we do NOT want the Pike compute host to
# be "healing" allocation information. Instead, we rely on the Pike
# scheduler to properly create allocations during scheduling.
#
# Pike compute hosts may still rework an
# allocation for an instance in a move operation during
# confirm_resize() on the source host which will remove the
# source resource provider from any allocation for an
# instance.
#
# In Queens and beyond, the scheduler will understand when
# a move operation has been requested and instead of
# creating a doubled-up allocation that contains both the
# source and destination host, the scheduler will take the
# original allocation (against the source host) and change
# the consumer ID of that allocation to be the migration
# UUID and not the instance UUID. The scheduler will
# allocate the resources for the destination host to the
# instance UUID.
# Some drivers (ironic) still need the allocations to be
# fixed up, as they transition the way their inventory is reported.
require_allocation_refresh = self.driver.requires_allocation_refresh
if require_allocation_refresh:
msg_allocation_refresh = (
"Compute driver requires allocation refresh. "
"Will auto-correct allocations to handle "
"Ocata-style assumptions.")
else:
msg_allocation_refresh = (
"Compute driver doesn't require allocation refresh and we're "
"on a compute host in a deployment that only has compute "
"hosts with Nova versions >=16 (Pike). Skipping "
"auto-correction of allocations.")
instance_by_uuid = {}
for instance in instances:
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
if msg_allocation_refresh:
LOG.debug(msg_allocation_refresh)
msg_allocation_refresh = False
self._update_usage_from_instance(context, instance, nodename,
require_allocation_refresh=require_allocation_refresh)
self._update_usage_from_instance(context, instance, nodename)
instance_by_uuid[instance.uuid] = instance
return instance_by_uuid

View File

@ -186,221 +186,6 @@ class IronicResourceTrackerTest(test_base.SchedulerReportClientTestBase):
instances[instance.uuid] = instance
return instances
def placement_get_inventory(self, rp_uuid):
url = '/resource_providers/%s/inventories' % rp_uuid
resp = self.report_client.get(url)
if 200 <= resp.status_code < 300:
return resp.json()['inventories']
else:
return resp.status_code
def placement_get_allocations(self, consumer_uuid):
url = '/allocations/%s' % consumer_uuid
resp = self.report_client.get(url)
if 200 <= resp.status_code < 300:
return resp.json()['allocations']
else:
return resp.status_code
def placement_get_custom_rcs(self):
url = '/resource_classes'
resp = self.report_client.get(url)
if 200 <= resp.status_code < 300:
all_rcs = resp.json()['resource_classes']
return [rc['name'] for rc in all_rcs
if rc['name'] not in fields.ResourceClass.STANDARD]
@mock.patch('nova.compute.utils.is_volume_backed_instance',
new=mock.Mock(return_value=False))
@mock.patch('nova.objects.compute_node.ComputeNode.save', new=mock.Mock())
def test_ironic_ocata_to_pike(self):
"""Check that when going from an Ocata installation with Ironic having
node's resource class attributes set, that we properly "auto-heal" the
inventory and allocation records in the placement API to account for
both the old-style VCPU/MEMORY_MB/DISK_GB resources as well as the new
custom resource class from Ironic's node.resource_class attribute.
"""
with self._interceptor():
# Before the resource tracker is "initialized", we shouldn't have
# any compute nodes in the RT's cache...
self.assertEqual(0, len(self.rt.compute_nodes))
# There should not be any records in the placement API since we
# haven't yet run update_available_resource() in the RT.
for cn in self.COMPUTE_NODE_FIXTURES.values():
self.assertEqual(404, self.placement_get_inventory(cn.uuid))
for inst in self.INSTANCE_FIXTURES.keys():
self.assertEqual({}, self.placement_get_allocations(inst))
# Nor should there be any custom resource classes in the placement
# API, since we haven't had an Ironic node's resource class set yet
self.assertEqual(0, len(self.placement_get_custom_rcs()))
# Now "initialize" the resource tracker as if the compute host is a
# Ocata host, with Ironic virt driver, but the admin has not yet
# added a resource_class attribute to the Ironic baremetal nodes in
# her system.
# NOTE(jaypipes): This is what nova.compute.manager.ComputeManager
# does when "initializing" the service...
for cn in self.COMPUTE_NODE_FIXTURES.values():
nodename = cn.hypervisor_hostname
self.driver_mock.get_available_resource.return_value = {
'hypervisor_hostname': nodename,
'hypervisor_type': 'ironic',
'hypervisor_version': 0,
'vcpus': cn.vcpus,
'vcpus_used': cn.vcpus_used,
'memory_mb': cn.memory_mb,
'memory_mb_used': cn.memory_mb_used,
'local_gb': cn.local_gb,
'local_gb_used': cn.local_gb_used,
'numa_topology': None,
'resource_class': None, # Act like admin hasn't set yet...
}
self.driver_mock.get_inventory.return_value = {
VCPU: {
'total': cn.vcpus,
'reserved': 0,
'min_unit': 1,
'max_unit': cn.vcpus,
'step_size': 1,
'allocation_ratio': 1.0,
},
MEMORY_MB: {
'total': cn.memory_mb,
'reserved': 0,
'min_unit': 1,
'max_unit': cn.memory_mb,
'step_size': 1,
'allocation_ratio': 1.0,
},
DISK_GB: {
'total': cn.local_gb,
'reserved': 0,
'min_unit': 1,
'max_unit': cn.local_gb,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
self.rt.update_available_resource(self.ctx, nodename)
self.assertEqual(3, len(self.rt.compute_nodes))
# A canary just to make sure the assertion below about the custom
# resource class being added wasn't already added somehow...
crcs = self.placement_get_custom_rcs()
self.assertNotIn('CUSTOM_SMALL_IRON', crcs)
# Verify that the placement API has the "old-style" resources in
# inventory and allocations
for cn in self.COMPUTE_NODE_FIXTURES.values():
inv = self.placement_get_inventory(cn.uuid)
self.assertEqual(3, len(inv))
# Now "spawn" an instance to the first compute node by calling the
# RT's instance_claim().
cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1]
cn1_nodename = cn1_obj.hypervisor_hostname
inst = self.instances[uuids.instance1]
# Since we're pike, the scheduler would have created our
# allocation for us. So, we can use our old update routine
# here to mimic that before we go do the compute RT claim,
# and then the checks below.
self.rt.reportclient.update_instance_allocation(self.ctx,
cn1_obj,
inst,
1)
with self.rt.instance_claim(self.ctx, inst, cn1_nodename):
pass
allocs = self.placement_get_allocations(inst.uuid)
self.assertEqual(1, len(allocs))
self.assertIn(uuids.cn1, allocs)
resources = allocs[uuids.cn1]['resources']
self.assertEqual(3, len(resources))
for rc in (VCPU, MEMORY_MB, DISK_GB):
self.assertIn(rc, resources)
# Now we emulate the operator setting ONE of the Ironic node's
# resource class attribute to the value of a custom resource class
# and re-run update_available_resource(). We will expect to see the
# inventory and allocations reset for the first compute node that
# had an instance on it. The new inventory and allocation records
# will be for VCPU, MEMORY_MB, DISK_GB, and also a new record for
# the custom resource class of the Ironic node.
self.driver_mock.get_available_resource.return_value = {
'hypervisor_hostname': cn1_obj.hypervisor_hostname,
'hypervisor_type': 'ironic',
'hypervisor_version': 0,
'vcpus': cn1_obj.vcpus,
'vcpus_used': cn1_obj.vcpus_used,
'memory_mb': cn1_obj.memory_mb,
'memory_mb_used': cn1_obj.memory_mb_used,
'local_gb': cn1_obj.local_gb,
'local_gb_used': cn1_obj.local_gb_used,
'numa_topology': None,
'resource_class': 'small-iron',
}
self.driver_mock.get_inventory.return_value = {
VCPU: {
'total': cn1_obj.vcpus,
'reserved': 0,
'min_unit': 1,
'max_unit': cn1_obj.vcpus,
'step_size': 1,
'allocation_ratio': 1.0,
},
MEMORY_MB: {
'total': cn1_obj.memory_mb,
'reserved': 0,
'min_unit': 1,
'max_unit': cn1_obj.memory_mb,
'step_size': 1,
'allocation_ratio': 1.0,
},
DISK_GB: {
'total': cn1_obj.local_gb,
'reserved': 0,
'min_unit': 1,
'max_unit': cn1_obj.local_gb,
'step_size': 1,
'allocation_ratio': 1.0,
},
'CUSTOM_SMALL_IRON': {
'total': 1,
'reserved': 0,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
self.rt.update_available_resource(self.ctx, cn1_nodename)
# Verify the auto-creation of the custom resource class, normalized
# to what the placement API expects
self.assertIn('CUSTOM_SMALL_IRON', self.placement_get_custom_rcs())
allocs = self.placement_get_allocations(inst.uuid)
self.assertEqual(1, len(allocs))
self.assertIn(uuids.cn1, allocs)
resources = allocs[uuids.cn1]['resources']
self.assertEqual(3, len(resources))
for rc in (VCPU, MEMORY_MB, DISK_GB):
self.assertIn(rc, resources)
# TODO(jaypipes): Check allocations include the CUSTOM_SMALL_IRON
# resource class. At the moment, we do not add an allocation record
# for the Ironic custom resource class. Once the flavor is updated
# to store a resources:$CUSTOM_RESOURCE_CLASS=1 extra_spec key and
# the scheduler is constructing the request_spec to actually
# request a single amount of that custom resource class, we will
# modify the allocation/claim to consume only the custom resource
# class and not the VCPU, MEMORY_MB and DISK_GB.
@mock.patch('nova.compute.utils.is_volume_backed_instance',
new=mock.Mock(return_value=False))
@mock.patch('nova.objects.compute_node.ComputeNode.save', new=mock.Mock())

View File

@ -3104,40 +3104,6 @@ class TestUpdateUsageFromInstance(BaseTestCase):
self.assertEqual(-1024, cn.free_ram_mb)
self.assertEqual(-1, cn.free_disk_gb)
def test_update_usage_from_instances_refreshes_ironic(self):
self.rt.driver.requires_allocation_refresh = True
@mock.patch.object(self.rt,
'_remove_deleted_instances_allocations')
@mock.patch.object(self.rt, '_update_usage_from_instance')
@mock.patch('nova.objects.Service.get_minimum_version',
return_value=22)
def test(version_mock, uufi, rdia):
self.rt._update_usage_from_instances('ctxt', [self.instance],
_NODENAME)
uufi.assert_called_once_with('ctxt', self.instance, _NODENAME,
require_allocation_refresh=True)
test()
def test_update_usage_from_instances_no_refresh(self):
self.rt.driver.requires_allocation_refresh = False
@mock.patch.object(self.rt,
'_remove_deleted_instances_allocations')
@mock.patch.object(self.rt, '_update_usage_from_instance')
@mock.patch('nova.objects.Service.get_minimum_version',
return_value=22)
def test(version_mock, uufi, rdia):
self.rt._update_usage_from_instances('ctxt', [self.instance],
_NODENAME)
uufi.assert_called_once_with('ctxt', self.instance, _NODENAME,
require_allocation_refresh=False)
test()
@mock.patch('nova.scheduler.utils.resources_from_flavor')
def test_delete_allocation_for_evacuated_instance(
self, mock_resource_from_flavor):

View File

@ -144,8 +144,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
'supports_migrate_to_same_host'],
'Driver capabilities for '
'\'supports_migrate_to_same_host\' is invalid')
self.assertTrue(self.driver.requires_allocation_refresh,
'Driver requires allocation refresh')
def test__get_hypervisor_type(self):
self.assertEqual('ironic', self.driver._get_hypervisor_type())

View File

@ -944,8 +944,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
'Driver capabilities for '
'\'supports_extend_volume\' '
'is invalid')
self.assertFalse(drvr.requires_allocation_refresh,
'Driver does not need allocation refresh')
self.assertTrue(drvr.capabilities['supports_trusted_certs'],
'Driver capabilities for '
'\'supports_trusted_certs\' '

View File

@ -134,8 +134,6 @@ class ComputeDriver(object):
"supports_trusted_certs": False,
}
requires_allocation_refresh = False
# Indicates if this driver will rebalance nodes among compute service
# hosts. This is really here for ironic and should not be used by any
# other driver.

View File

@ -142,12 +142,6 @@ class IronicDriver(virt_driver.ComputeDriver):
"supports_trusted_certs": False,
}
# Needed for exiting instances to have allocations for custom resource
# class resources
# TODO(johngarbutt) we should remove this once the resource class
# migration has been completed.
requires_allocation_refresh = True
# This driver is capable of rebalancing nodes between computes.
rebalances_nodes = True