Merge "Modify the mdevs in the migrate XML"

This commit is contained in:
Zuul 2024-02-29 06:58:40 +00:00 committed by Gerrit Code Review
commit 060445aa2f
5 changed files with 105 additions and 4 deletions

View File

@ -293,6 +293,36 @@ Caveats
This information is correct as of the 17.0.0 Queens release. Where
improvements have been made or issues fixed, they are noted per item.
* When live-migrating an instance using vGPUs, the libvirt guest domain XML
isn't updated with the new mediated device UUID to use for the target.
.. versionchanged:: 29.0.0
In the 2024.2 Caracal release, Nova now `supports vGPU live-migrations`_. In
order to do this, both the source and target compute service need to have
minimum versions of libvirt-8.6.0, QEMU-8.1.0 and Linux kernel 5.18.0. You
need to ensure that either you use only single common vGPU type between two
computes. Where multiple mdev types are configured on the source and
destination host, custom traits or custom resource classes must be
configured, reported by the host and requested by the instance to make sure
that the Placement API correctly returns the supported GPU using the right
vGPU type for a migration. Last but not least, if you want to live-migrate
nVidia mediated devices, you need to update
:oslo.config:option:`libvirt.live_migration_downtime`,
:oslo.config:option:`libvirt.live_migration_downtime_steps` and
:oslo.config:option:`libvirt.live_migration_downtime_delay`:
.. code-block:: ini
live_migration_downtime = 500000
live_migration_downtime_steps = 3
live_migration_downtime_delay = 3
You can see an example of a working live-migration `here`__.
.. __: http://sbauza.github.io/vgpu/vgpu_live_migration.html
* Suspending a guest that has vGPUs doesn't yet work because of a libvirt
limitation (it can't hot-unplug mediated devices from a guest). Workarounds
using other instance actions (like snapshotting the instance or shelving it)
@ -355,6 +385,7 @@ For nested vGPUs:
.. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html
.. Links
.. _Intel GVT-g: https://01.org/igvt-g

View File

@ -576,10 +576,9 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
mdevs = self.src.driver._get_all_assigned_mediated_devices(inst)
self.assertEqual(1, len(mdevs))
self._live_migrate(self.server, 'completed')
# FIXME(sbauza): The domain is fully copied to the destination so the
# XML contains the original mdev but given the 'devices' attribute on
# the fixture doesn't have it, that's why we have a KeyError.
self.assertRaises(KeyError, self.assert_mdev_usage, self.dest, 0)
# Now the destination XML is updated, so the destination mdev is
# correctly used.
self.assert_mdev_usage(self.dest, 1)
class VGPULiveMigrationTestsLMFailed(VGPULiveMigrationTests):

View File

@ -190,6 +190,40 @@ class UtilityMigrationTestCase(test.NoDBTestCase):
new_xml = new_xml.replace("/dev/dax0.2", "/dev/dax2.0")
self.assertXmlEqual(res, new_xml)
def test_update_mdev_xml(self):
xml_pattern = """<domain>
<devices>
<hostdev mode="subsystem" type="mdev" model="vfio-pci">
<source>
<address uuid="%s"/>
</source>
</hostdev>
</devices>
</domain>"""
data = objects.LibvirtLiveMigrateData(
target_mdevs={uuids.src_mdev: uuids.dst_mdev})
doc = etree.fromstring(xml_pattern % uuids.src_mdev)
res = migration._update_mdev_xml(doc, data.target_mdevs)
self.assertEqual(xml_pattern % uuids.dst_mdev,
etree.tostring(res, encoding='unicode'))
def test_update_mdev_xml_fails_on_notfound_mdev(self):
xml_pattern = """<domain>
<devices>
<hostdev mode="subsystem" type="mdev" model="vfio-pci">
<source>
<address uuid="%s"/>
</source>
</hostdev>
</devices>
</domain>"""
data = objects.LibvirtLiveMigrateData(
target_mdevs={uuids.other_mdev: uuids.dst_mdev})
doc = etree.fromstring(xml_pattern % uuids.src_mdev)
# src_mdev UUID doesn't exist in target_mdevs dict
self.assertRaises(exception.NovaException,
migration._update_mdev_xml, doc, data.target_mdevs)
def test_update_numa_xml(self):
doc = etree.fromstring("""
<domain>

View File

@ -67,6 +67,8 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
if 'dst_numa_info' in migrate_data:
xml_doc = _update_numa_xml(xml_doc, migrate_data)
if 'target_mdevs' in migrate_data:
xml_doc = _update_mdev_xml(xml_doc, migrate_data.target_mdevs)
if new_resources:
xml_doc = _update_device_resources_xml(xml_doc, new_resources)
return etree.tostring(xml_doc, encoding='unicode')
@ -106,6 +108,28 @@ def _update_vpmems_xml(xml_doc, vpmems):
return xml_doc
def _update_mdev_xml(xml_doc, target_mdevs):
for dev in xml_doc.findall('./devices/hostdev'):
if dev.get('type') == 'mdev':
address_tag = dev.find('source/address')
if address_tag is None:
continue
src_mdev = address_tag.get('uuid')
if src_mdev is not None:
dst_mdev = target_mdevs.get(src_mdev)
if dst_mdev is None:
# For some reason, we don't know which mdev to use
# so we prefer to abort the live-migration.
raise exception.NovaException(
'Unable to find the destination mediated device UUID '
'to use for this source mdev UUID : %s' % src_mdev)
else:
address_tag.set('uuid', dst_mdev)
LOG.debug('_update_mdev_xml output xml=%s',
etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
return xml_doc
def _update_numa_xml(xml_doc, migrate_data):
LOG.debug('_update_numa_xml input xml=%s',
etree.tostring(xml_doc, encoding='unicode', pretty_print=True))

View File

@ -0,0 +1,13 @@
---
features:
- |
Instances using vGPUs can now be correctly live-migrated by the libvirt
driver between compute nodes supporting the same mediated device types used
by the instance. In order to be able to do this, the compute hosts need to
support at least the minimum versions of libvirt-8.6.0, QEMU-8.1.0 and
Linux kernel 5.18.0. If operators use multiple vGPU types per compute, they
need to make sure they already use custom traits or custom resource classes
for the GPUs resource providers and that the instance was created with a
flavor using either a custom resource class or asking for a custom trait in
order to make sure that Placement API will provide the right target GPU
using the same mdev type for the instance.