Heal allocation for same host resize

Same host resize needs special handling in the allocation healing logic
as both the source and the dest host PCI devices are visible to the
healing code as the PciDevice.instance_uuid points to the healed
instance in both cases.

blueprint: pci-device-tracking-in-placement
Change-Id: Id02e445c55fc956965b7d725f0260876d42422f2
This commit is contained in:
Balazs Gibizer 2022-08-26 18:02:59 +02:00
parent 98e9989cad
commit ab439dadb1
6 changed files with 227 additions and 9 deletions

View File

@ -254,7 +254,8 @@ class PciResourceProvider:
def update_allocations(
self,
allocations: dict,
provider_tree: provider_tree.ProviderTree
provider_tree: provider_tree.ProviderTree,
same_host_instances: ty.List[str],
) -> bool:
updated = False
@ -293,6 +294,21 @@ class PciResourceProvider:
# heal_allocation CLI instead.
continue
if consumer in same_host_instances:
# This is a nasty special case. This instance is undergoing
# a same host resize. So in Placement the source host
# allocation is held by the migration UUID *but* the
# PciDevice.instance_uuid is set for the instance UUID both
# on the source and on the destination host. As the source and
# dest are the same for migration we will see PciDevice
# objects assigned to this instance that should not be
# allocated to the instance UUID in placement.
# As noted above we don't want to take care in progress
# migration during healing. So we simply ignore this instance.
# If the instance needs healing then it will be healed when
# after the migration is confirmed or reverted.
continue
current_allocs = allocations[consumer]['allocations']
current_rp_allocs = current_allocs.get(rp_uuid)
@ -326,9 +342,14 @@ class PciResourceProvider:
class PlacementView:
"""The PCI Placement view"""
def __init__(self, hypervisor_hostname: str) -> None:
def __init__(
self,
hypervisor_hostname: str,
instances_under_same_host_resize: ty.List[str],
) -> None:
self.rps: ty.Dict[str, PciResourceProvider] = {}
self.root_rp_name = hypervisor_hostname
self.same_host_instances = instances_under_same_host_resize
def _get_rp_name_for_address(self, addr: str) -> str:
return f"{self.root_rp_name}_{addr.upper()}"
@ -459,7 +480,11 @@ class PlacementView:
"""
updated = False
for rp in self.rps.values():
updated |= rp.update_allocations(allocations, provider_tree)
updated |= rp.update_allocations(
allocations,
provider_tree,
self.same_host_instances,
)
return updated
@ -500,6 +525,7 @@ def update_provider_tree_for_pci(
nodename: str,
pci_tracker: pci_manager.PciDevTracker,
allocations: dict,
instances_under_same_host_resize: ty.List[str],
) -> bool:
"""Based on the PciDevice objects in the pci_tracker it calculates what
inventories and allocations needs to exist in placement and create the
@ -529,6 +555,8 @@ def update_provider_tree_for_pci(
},
...
}
:param instances_under_same_host_resize: A list of instance UUIDs that
are undergoing same host resize on this host.
"""
if not _is_placement_tracking_enabled():
ensure_tracking_was_not_enabled_before(provider_tree)
@ -541,7 +569,7 @@ def update_provider_tree_for_pci(
'Collecting PCI inventories and allocations to track them in Placement'
)
pv = PlacementView(nodename)
pv = PlacementView(nodename, instances_under_same_host_resize)
for dev in pci_tracker.pci_devs:
# match the PCI device with the [pci]dev_spec config to access
# the configuration metadata tags

View File

@ -1260,6 +1260,11 @@ class ResourceTracker(object):
context, nodename, provider_tree=prov_tree)
prov_tree.update_traits(nodename, traits)
instances_under_same_host_resize = [
migration.instance_uuid
for migration in self.tracked_migrations.values()
if migration.is_same_host_resize
]
# NOTE(gibi): Tracking PCI in placement is different from other
# resources.
#
@ -1278,7 +1283,12 @@ class ResourceTracker(object):
# is enabled. So we need to be ready to heal PCI allocations at
# every call not just at startup.
pci_reshaped = pci_placement_translator.update_provider_tree_for_pci(
prov_tree, nodename, self.pci_tracker, allocs)
prov_tree,
nodename,
self.pci_tracker,
allocs,
instances_under_same_host_resize,
)
self.provider_tree = prov_tree

View File

@ -215,6 +215,10 @@ class Migration(base.NovaPersistentObject, base.NovaObject):
def is_resize(self):
return self.migration_type == fields.MigrationType.RESIZE
@property
def is_same_host_resize(self):
return self.is_resize and self.source_node == self.dest_node
@base.NovaObjectRegistry.register
class MigrationList(base.ObjectListBase, base.NovaObject):

View File

@ -1346,3 +1346,113 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
del compute1_expected_placement_view["allocations"][server["id"]]
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
def test_heal_allocation_during_same_host_resize(self):
self.flags(allow_resize_to_same_host=True)
# The fake libvirt will emulate on the host:
# * one type-PFs (slot 0) with 3 type-VFs
compute1_pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=1, num_vfs=3)
# the config matches just the VFs
compute1_device_spec = self._to_device_spec_conf(
[
{
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.VF_PROD_ID,
"address": "0000:81:00.*",
},
]
)
self.flags(group='pci', device_spec=compute1_device_spec)
# Start a compute with PCI tracking in placement
self.mock_pci_report_in_placement.return_value = True
self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
self.assertPCIDeviceCounts("compute1", total=3, free=3)
compute1_expected_placement_view = {
"inventories": {
"0000:81:00.0": {self.VF_RC: 3},
},
"traits": {
"0000:81:00.0": [],
},
"usages": {
"0000:81:00.0": {self.VF_RC: 0},
},
"allocations": {},
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# Create an instance consuming one VFs
extra_spec = {"pci_passthrough:alias": "a-vf:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._create_server(flavor_id=flavor_id, networks=[])
self.assertPCIDeviceCounts("compute1", total=3, free=2)
# As scheduling does not support PCI in placement yet no allocation
# is created for the PCI consumption by the scheduler. BUT the resource
# tracker in the compute will heal the missing PCI allocation
compute1_expected_placement_view[
"usages"]["0000:81:00.0"][self.VF_RC] = 1
compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {self.VF_RC: 1}
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# resize the server to consume 2 VFs on the same host
extra_spec = {"pci_passthrough:alias": "a-vf:2"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
server = self._resize_server(server, flavor_id)
# during resize both the source and the dest allocation is kept
# and in same host resize that means both consumed from the same host
self.assertPCIDeviceCounts("compute1", total=3, free=0)
# the source side of the allocation held by the migration
self._move_server_allocation(
compute1_expected_placement_view["allocations"], server['id'])
# NOTE(gibi): we intentionally don't heal allocation for the instance
# while it is being resized. See the comment in the
# pci_placement_translator about the reasoning.
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# revert the resize
self._revert_resize(server)
self.assertPCIDeviceCounts("compute1", total=3, free=2)
# the original allocations are restored
self._move_server_allocation(
compute1_expected_placement_view["allocations"],
server["id"],
revert=True,
)
compute1_expected_placement_view[
"usages"]["0000:81:00.0"][self.VF_RC] = 1
compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {self.VF_RC: 1}
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
# now resize and then confirm it
self._resize_server(server, flavor_id)
self._confirm_resize(server)
# we expect that the consumption is according to the new flavor
self.assertPCIDeviceCounts("compute1", total=3, free=1)
compute1_expected_placement_view[
"usages"]["0000:81:00.0"][self.VF_RC] = 2
compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {self.VF_RC: 2}
}
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)
self._run_periodics()
self.assert_placement_pci_view(
"compute1", **compute1_expected_placement_view)

View File

@ -61,7 +61,7 @@ class TestTranslator(test.NoDBTestCase):
provider_tree = mock.NonCallableMock()
ppt.update_provider_tree_for_pci(
provider_tree, "fake-node", pci_tracker, {})
provider_tree, "fake-node", pci_tracker, {}, [])
self.assertIn(
"Device spec is not found for device 0000:81:00.0 in "
@ -113,7 +113,8 @@ class TestTranslator(test.NoDBTestCase):
)
def test_dependent_device_pf_then_vf(self):
pv = ppt.PlacementView("fake-node")
pv = ppt.PlacementView(
"fake-node", instances_under_same_host_resize=[])
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
@ -140,7 +141,8 @@ class TestTranslator(test.NoDBTestCase):
)
def test_dependent_device_vf_then_pf(self):
pv = ppt.PlacementView("fake-node")
pv = ppt.PlacementView(
"fake-node", instances_under_same_host_resize=[])
pf = pci_device.PciDevice(
address="0000:81:00.0",
dev_type=fields.PciDeviceType.SRIOV_PF
@ -173,7 +175,8 @@ class TestTranslator(test.NoDBTestCase):
)
def test_mixed_rc_for_sibling_vfs(self):
pv = ppt.PlacementView("fake-node")
pv = ppt.PlacementView(
"fake-node", instances_under_same_host_resize=[])
vf1, vf2, vf3, vf4 = [
pci_device.PciDevice(
address="0000:81:00.%d" % f,

View File

@ -1808,6 +1808,7 @@ class TestUpdateComputeNode(BaseTestCase):
compute_obj.hypervisor_hostname,
self.rt.pci_tracker,
mock_get_allocs.return_value,
[],
)
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@ -1847,6 +1848,7 @@ class TestUpdateComputeNode(BaseTestCase):
compute_obj.hypervisor_hostname,
self.rt.pci_tracker,
mock_get_allocs.return_value,
[],
)
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(
@ -1892,11 +1894,72 @@ class TestUpdateComputeNode(BaseTestCase):
compute_obj.hypervisor_hostname,
self.rt.pci_tracker,
mock_get_allocs.return_value,
[],
)
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(
mock.sentinel.ctx, ptree, allocations=mock_get_allocs.return_value)
@mock.patch(
'nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait',
new=mock.Mock()
)
@mock.patch(
'nova.compute.resource_tracker.ResourceTracker._resource_change',
new=mock.Mock(return_value=False)
)
@mock.patch(
'nova.compute.pci_placement_translator.update_provider_tree_for_pci')
def test_update_pci_reporting_same_host_resize(
self, mock_update_provider_tree_for_pci
):
"""Assert that resource tracker calls update_provider_tree_for_pci
and with the list of instances that are being resized to the same
host.
"""
compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self._setup_rt()
ptree = self._setup_ptree(compute_obj)
# simulate that pci reporting did not touch allocations
mock_update_provider_tree_for_pci.return_value = False
self.rt.tracked_migrations = {
uuids.inst1: objects.Migration(
migration_type="resize",
source_node="fake-node",
dest_node="fake-node",
instance_uuid=uuids.inst1,
),
uuids.inst2: objects.Migration(
migration_type="evacuation",
source_node="fake-node",
dest_node="fake-node",
instance_uuid=uuids.inst2,
),
uuids.inst3: objects.Migration(
migration_type="resize",
source_node="fake-node1",
dest_node="fake-node2",
instance_uuid=uuids.inst3,
),
}
self.rt._update(mock.sentinel.ctx, compute_obj)
mock_get_allocs = (
self.report_client_mock.get_allocations_for_provider_tree)
mock_get_allocs.assert_called_once_with(
mock.sentinel.ctx, compute_obj.hypervisor_hostname)
mock_update_provider_tree_for_pci.assert_called_once_with(
ptree,
compute_obj.hypervisor_hostname,
self.rt.pci_tracker,
mock_get_allocs.return_value,
[uuids.inst1],
)
upt = self.rt.reportclient.update_from_provider_tree
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@mock.patch('nova.objects.Service.get_by_compute_host',
return_value=objects.Service(disabled=True))
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):