Heal allocation for same host resize

Same host resize needs special handling in the allocation healing logic as both the source and the dest host PCI devices are visible to the healing code as the PciDevice.instance_uuid points to the healed instance in both cases. blueprint: pci-device-tracking-in-placement Change-Id: Id02e445c55fc956965b7d725f0260876d42422f2
2022-08-26 18:02:59 +02:00 · 2022-08-26 18:02:59 +02:00 · ab439dadb1
parent 98e9989cad
commit ab439dadb1
6 changed files with 227 additions and 9 deletions
--- a/nova/compute/pci_placement_translator.py
+++ b/nova/compute/pci_placement_translator.py
@ -254,7 +254,8 @@ class PciResourceProvider:
    def update_allocations(
        self,
        allocations: dict,
-        provider_tree: provider_tree.ProviderTree
+        provider_tree: provider_tree.ProviderTree,
+        same_host_instances: ty.List[str],
    ) -> bool:
        updated = False

@ -293,6 +294,21 @@ class PciResourceProvider:
                #    heal_allocation CLI instead.
                continue

+            if consumer in same_host_instances:
+                # This is a nasty special case. This instance is undergoing
+                # a same host resize. So in Placement the source host
+                # allocation is held by the migration UUID *but* the
+                # PciDevice.instance_uuid is set for the instance UUID both
+                # on the source and on the destination host. As the source and
+                # dest are the same for migration we will see PciDevice
+                # objects assigned to this instance that should not be
+                # allocated to the instance UUID in placement.
+                # As noted above we don't want to take care in progress
+                # migration during healing. So we simply ignore this instance.
+                # If the instance needs healing then it will be healed when
+                # after the migration is confirmed or reverted.
+                continue
+
            current_allocs = allocations[consumer]['allocations']
            current_rp_allocs = current_allocs.get(rp_uuid)

@ -326,9 +342,14 @@ class PciResourceProvider:
 class PlacementView:
    """The PCI Placement view"""

-    def __init__(self, hypervisor_hostname: str) -> None:
+    def __init__(
+        self,
+        hypervisor_hostname: str,
+        instances_under_same_host_resize: ty.List[str],
+    ) -> None:
        self.rps: ty.Dict[str, PciResourceProvider] = {}
        self.root_rp_name = hypervisor_hostname
+        self.same_host_instances = instances_under_same_host_resize

    def _get_rp_name_for_address(self, addr: str) -> str:
        return f"{self.root_rp_name}_{addr.upper()}"
@ -459,7 +480,11 @@ class PlacementView:
        """
        updated = False
        for rp in self.rps.values():
-            updated |= rp.update_allocations(allocations, provider_tree)
+            updated |= rp.update_allocations(
+                allocations,
+                provider_tree,
+                self.same_host_instances,
+            )
        return updated


@ -500,6 +525,7 @@ def update_provider_tree_for_pci(
    nodename: str,
    pci_tracker: pci_manager.PciDevTracker,
    allocations: dict,
+    instances_under_same_host_resize: ty.List[str],
 ) -> bool:
    """Based on the PciDevice objects in the pci_tracker it calculates what
    inventories and allocations needs to exist in placement and create the
@ -529,6 +555,8 @@ def update_provider_tree_for_pci(
                },
                ...
              }
+    :param instances_under_same_host_resize: A list of instance UUIDs that
+        are undergoing same host resize on this host.
    """
    if not _is_placement_tracking_enabled():
        ensure_tracking_was_not_enabled_before(provider_tree)
@ -541,7 +569,7 @@ def update_provider_tree_for_pci(
        'Collecting PCI inventories and allocations to track them in Placement'
    )

-    pv = PlacementView(nodename)
+    pv = PlacementView(nodename, instances_under_same_host_resize)
    for dev in pci_tracker.pci_devs:
        # match the PCI device with the [pci]dev_spec config to access
        # the configuration metadata tags
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@ -1260,6 +1260,11 @@ class ResourceTracker(object):
            context, nodename, provider_tree=prov_tree)
        prov_tree.update_traits(nodename, traits)

+        instances_under_same_host_resize = [
+            migration.instance_uuid
+            for migration in self.tracked_migrations.values()
+            if migration.is_same_host_resize
+        ]
        # NOTE(gibi): Tracking PCI in placement is different from other
        # resources.
        #
@ -1278,7 +1283,12 @@ class ResourceTracker(object):
        # is enabled. So we need to be ready to heal PCI allocations at
        # every call not just at startup.
        pci_reshaped = pci_placement_translator.update_provider_tree_for_pci(
-            prov_tree, nodename, self.pci_tracker, allocs)
+            prov_tree,
+            nodename,
+            self.pci_tracker,
+            allocs,
+            instances_under_same_host_resize,
+        )

        self.provider_tree = prov_tree

--- a/nova/objects/migration.py
+++ b/nova/objects/migration.py
@ -215,6 +215,10 @@ class Migration(base.NovaPersistentObject, base.NovaObject):
    def is_resize(self):
        return self.migration_type == fields.MigrationType.RESIZE

+    @property
+    def is_same_host_resize(self):
+        return self.is_resize and self.source_node == self.dest_node
+

@base.NovaObjectRegistry.register
 class MigrationList(base.ObjectListBase, base.NovaObject):
--- a/nova/tests/functional/libvirt/test_pci_in_placement.py
+++ b/nova/tests/functional/libvirt/test_pci_in_placement.py
@ -1346,3 +1346,113 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
        del compute1_expected_placement_view["allocations"][server["id"]]
        self.assert_placement_pci_view(
            "compute1", **compute1_expected_placement_view)
+
+    def test_heal_allocation_during_same_host_resize(self):
+        self.flags(allow_resize_to_same_host=True)
+        # The fake libvirt will emulate on the host:
+        # * one type-PFs (slot 0) with 3 type-VFs
+        compute1_pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=0, num_pfs=1, num_vfs=3)
+        # the config matches just the VFs
+        compute1_device_spec = self._to_device_spec_conf(
+            [
+                {
+                    "vendor_id": fakelibvirt.PCI_VEND_ID,
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                    "address": "0000:81:00.*",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=compute1_device_spec)
+        # Start a compute with PCI tracking in placement
+        self.mock_pci_report_in_placement.return_value = True
+        self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
+        self.assertPCIDeviceCounts("compute1", total=3, free=3)
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {self.VF_RC: 3},
+            },
+            "traits": {
+                "0000:81:00.0": [],
+            },
+            "usages": {
+                "0000:81:00.0": {self.VF_RC: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        # Create an instance consuming one VFs
+        extra_spec = {"pci_passthrough:alias": "a-vf:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._create_server(flavor_id=flavor_id, networks=[])
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # As scheduling does not support PCI in placement yet no allocation
+        # is created for the PCI consumption by the scheduler. BUT the resource
+        # tracker in the compute will heal the missing PCI allocation
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # resize the server to consume 2 VFs on the same host
+        extra_spec = {"pci_passthrough:alias": "a-vf:2"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._resize_server(server, flavor_id)
+        # during resize both the source and the dest allocation is kept
+        # and in same host resize that means both consumed from the same host
+        self.assertPCIDeviceCounts("compute1", total=3, free=0)
+        # the source side of the allocation held by the migration
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"], server['id'])
+        # NOTE(gibi): we intentionally don't heal allocation for the instance
+        # while it is being resized. See the comment in the
+        # pci_placement_translator about the reasoning.
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # revert the resize
+        self._revert_resize(server)
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # the original allocations are restored
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"],
+            server["id"],
+            revert=True,
+        )
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # now resize and then confirm it
+        self._resize_server(server, flavor_id)
+        self._confirm_resize(server)
+
+        # we expect that the consumption is according to the new flavor
+        self.assertPCIDeviceCounts("compute1", total=3, free=1)
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 2
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 2}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
--- a/nova/tests/unit/compute/test_pci_placement_translator.py
+++ b/nova/tests/unit/compute/test_pci_placement_translator.py
@ -61,7 +61,7 @@ class TestTranslator(test.NoDBTestCase):
        provider_tree = mock.NonCallableMock()

        ppt.update_provider_tree_for_pci(
-            provider_tree, "fake-node", pci_tracker, {})
+            provider_tree, "fake-node", pci_tracker, {}, [])

        self.assertIn(
            "Device spec is not found for device 0000:81:00.0 in "
@ -113,7 +113,8 @@ class TestTranslator(test.NoDBTestCase):
        )

    def test_dependent_device_pf_then_vf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
        pf = pci_device.PciDevice(
            address="0000:81:00.0",
            dev_type=fields.PciDeviceType.SRIOV_PF
@ -140,7 +141,8 @@ class TestTranslator(test.NoDBTestCase):
        )

    def test_dependent_device_vf_then_pf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
        pf = pci_device.PciDevice(
            address="0000:81:00.0",
            dev_type=fields.PciDeviceType.SRIOV_PF
@ -173,7 +175,8 @@ class TestTranslator(test.NoDBTestCase):
        )

    def test_mixed_rc_for_sibling_vfs(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
        vf1, vf2, vf3, vf4 = [
            pci_device.PciDevice(
                address="0000:81:00.%d" % f,
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@ -1808,6 +1808,7 @@ class TestUpdateComputeNode(BaseTestCase):
            compute_obj.hypervisor_hostname,
            self.rt.pci_tracker,
            mock_get_allocs.return_value,
+            [],
        )
        upt = self.rt.reportclient.update_from_provider_tree
        upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@ -1847,6 +1848,7 @@ class TestUpdateComputeNode(BaseTestCase):
            compute_obj.hypervisor_hostname,
            self.rt.pci_tracker,
            mock_get_allocs.return_value,
+            [],
        )
        upt = self.rt.reportclient.update_from_provider_tree
        upt.assert_called_once_with(
@ -1892,11 +1894,72 @@ class TestUpdateComputeNode(BaseTestCase):
            compute_obj.hypervisor_hostname,
            self.rt.pci_tracker,
            mock_get_allocs.return_value,
+            [],
        )
        upt = self.rt.reportclient.update_from_provider_tree
        upt.assert_called_once_with(
            mock.sentinel.ctx, ptree, allocations=mock_get_allocs.return_value)

+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker.'
+        '_sync_compute_service_disabled_trait',
+        new=mock.Mock()
+    )
+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker._resource_change',
+        new=mock.Mock(return_value=False)
+    )
+    @mock.patch(
+        'nova.compute.pci_placement_translator.update_provider_tree_for_pci')
+    def test_update_pci_reporting_same_host_resize(
+        self, mock_update_provider_tree_for_pci
+    ):
+        """Assert that resource tracker calls update_provider_tree_for_pci
+        and with the list of instances that are being resized to the same
+        host.
+        """
+        compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
+        self._setup_rt()
+        ptree = self._setup_ptree(compute_obj)
+        # simulate that pci reporting did not touch allocations
+        mock_update_provider_tree_for_pci.return_value = False
+        self.rt.tracked_migrations = {
+            uuids.inst1: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst1,
+            ),
+            uuids.inst2: objects.Migration(
+                migration_type="evacuation",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst2,
+            ),
+            uuids.inst3: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node1",
+                dest_node="fake-node2",
+                instance_uuid=uuids.inst3,
+            ),
+        }
+
+        self.rt._update(mock.sentinel.ctx, compute_obj)
+
+        mock_get_allocs = (
+            self.report_client_mock.get_allocations_for_provider_tree)
+        mock_get_allocs.assert_called_once_with(
+            mock.sentinel.ctx, compute_obj.hypervisor_hostname)
+        mock_update_provider_tree_for_pci.assert_called_once_with(
+            ptree,
+            compute_obj.hypervisor_hostname,
+            self.rt.pci_tracker,
+            mock_get_allocs.return_value,
+            [uuids.inst1],
+        )
+        upt = self.rt.reportclient.update_from_provider_tree
+        upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
+
    @mock.patch('nova.objects.Service.get_by_compute_host',
                return_value=objects.Service(disabled=True))
    def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):