diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index c7f295dda09f..a26c3be65123 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -293,6 +293,36 @@ Caveats This information is correct as of the 17.0.0 Queens release. Where improvements have been made or issues fixed, they are noted per item. +* When live-migrating an instance using vGPUs, the libvirt guest domain XML + isn't updated with the new mediated device UUID to use for the target. + + .. versionchanged:: 29.0.0 + + In the 2024.2 Caracal release, Nova now `supports vGPU live-migrations`_. In + order to do this, both the source and target compute service need to have + minimum versions of libvirt-8.6.0, QEMU-8.1.0 and Linux kernel 5.18.0. You + need to ensure that either you use only single common vGPU type between two + computes. Where multiple mdev types are configured on the source and + destination host, custom traits or custom resource classes must be + configured, reported by the host and requested by the instance to make sure + that the Placement API correctly returns the supported GPU using the right + vGPU type for a migration. Last but not least, if you want to live-migrate + nVidia mediated devices, you need to update + :oslo.config:option:`libvirt.live_migration_downtime`, + :oslo.config:option:`libvirt.live_migration_downtime_steps` and + :oslo.config:option:`libvirt.live_migration_downtime_delay`: + + .. code-block:: ini + + live_migration_downtime = 500000 + live_migration_downtime_steps = 3 + live_migration_downtime_delay = 3 + + You can see an example of a working live-migration `here`__. + + .. __: http://sbauza.github.io/vgpu/vgpu_live_migration.html + + * Suspending a guest that has vGPUs doesn't yet work because of a libvirt limitation (it can't hot-unplug mediated devices from a guest). Workarounds using other instance actions (like snapshotting the instance or shelving it) @@ -355,6 +385,7 @@ For nested vGPUs: .. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688 .. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705 +.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html .. Links .. _Intel GVT-g: https://01.org/igvt-g diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 7def9bc6d875..8f108d216b89 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -576,10 +576,9 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase): mdevs = self.src.driver._get_all_assigned_mediated_devices(inst) self.assertEqual(1, len(mdevs)) self._live_migrate(self.server, 'completed') - # FIXME(sbauza): The domain is fully copied to the destination so the - # XML contains the original mdev but given the 'devices' attribute on - # the fixture doesn't have it, that's why we have a KeyError. - self.assertRaises(KeyError, self.assert_mdev_usage, self.dest, 0) + # Now the destination XML is updated, so the destination mdev is + # correctly used. + self.assert_mdev_usage(self.dest, 1) class VGPULiveMigrationTestsLMFailed(VGPULiveMigrationTests): diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py index 155c25998687..ca4fb02a1267 100644 --- a/nova/tests/unit/virt/libvirt/test_migration.py +++ b/nova/tests/unit/virt/libvirt/test_migration.py @@ -190,6 +190,40 @@ class UtilityMigrationTestCase(test.NoDBTestCase): new_xml = new_xml.replace("/dev/dax0.2", "/dev/dax2.0") self.assertXmlEqual(res, new_xml) + def test_update_mdev_xml(self): + xml_pattern = """ + + + +
+ + + +""" + data = objects.LibvirtLiveMigrateData( + target_mdevs={uuids.src_mdev: uuids.dst_mdev}) + doc = etree.fromstring(xml_pattern % uuids.src_mdev) + res = migration._update_mdev_xml(doc, data.target_mdevs) + self.assertEqual(xml_pattern % uuids.dst_mdev, + etree.tostring(res, encoding='unicode')) + + def test_update_mdev_xml_fails_on_notfound_mdev(self): + xml_pattern = """ + + + +
+ + + +""" + data = objects.LibvirtLiveMigrateData( + target_mdevs={uuids.other_mdev: uuids.dst_mdev}) + doc = etree.fromstring(xml_pattern % uuids.src_mdev) + # src_mdev UUID doesn't exist in target_mdevs dict + self.assertRaises(exception.NovaException, + migration._update_mdev_xml, doc, data.target_mdevs) + def test_update_numa_xml(self): doc = etree.fromstring(""" diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py index 0aacec56d8da..22293c2fd973 100644 --- a/nova/virt/libvirt/migration.py +++ b/nova/virt/libvirt/migration.py @@ -67,6 +67,8 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config, xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config) if 'dst_numa_info' in migrate_data: xml_doc = _update_numa_xml(xml_doc, migrate_data) + if 'target_mdevs' in migrate_data: + xml_doc = _update_mdev_xml(xml_doc, migrate_data.target_mdevs) if new_resources: xml_doc = _update_device_resources_xml(xml_doc, new_resources) return etree.tostring(xml_doc, encoding='unicode') @@ -106,6 +108,28 @@ def _update_vpmems_xml(xml_doc, vpmems): return xml_doc +def _update_mdev_xml(xml_doc, target_mdevs): + for dev in xml_doc.findall('./devices/hostdev'): + if dev.get('type') == 'mdev': + address_tag = dev.find('source/address') + if address_tag is None: + continue + src_mdev = address_tag.get('uuid') + if src_mdev is not None: + dst_mdev = target_mdevs.get(src_mdev) + if dst_mdev is None: + # For some reason, we don't know which mdev to use + # so we prefer to abort the live-migration. + raise exception.NovaException( + 'Unable to find the destination mediated device UUID ' + 'to use for this source mdev UUID : %s' % src_mdev) + else: + address_tag.set('uuid', dst_mdev) + LOG.debug('_update_mdev_xml output xml=%s', + etree.tostring(xml_doc, encoding='unicode', pretty_print=True)) + return xml_doc + + def _update_numa_xml(xml_doc, migrate_data): LOG.debug('_update_numa_xml input xml=%s', etree.tostring(xml_doc, encoding='unicode', pretty_print=True)) diff --git a/releasenotes/notes/bp-libvirt-mdev-live-migrate-4396dbe4d9a9775f.yaml b/releasenotes/notes/bp-libvirt-mdev-live-migrate-4396dbe4d9a9775f.yaml new file mode 100644 index 000000000000..0ac1c909b06c --- /dev/null +++ b/releasenotes/notes/bp-libvirt-mdev-live-migrate-4396dbe4d9a9775f.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + Instances using vGPUs can now be correctly live-migrated by the libvirt + driver between compute nodes supporting the same mediated device types used + by the instance. In order to be able to do this, the compute hosts need to + support at least the minimum versions of libvirt-8.6.0, QEMU-8.1.0 and + Linux kernel 5.18.0. If operators use multiple vGPU types per compute, they + need to make sure they already use custom traits or custom resource classes + for the GPUs resource providers and that the instance was created with a + flavor using either a custom resource class or asking for a custom trait in + order to make sure that Placement API will provide the right target GPU + using the same mdev type for the instance.