Merge "Modify the mdevs in the migrate XML"

2024-02-29 06:58:40 +00:00 · 2024-02-29 06:58:40 +00:00 · 060445aa2f
parent bb55200683 8abc7b47fd
commit 060445aa2f
5 changed files with 105 additions and 4 deletions
--- a/doc/source/admin/virtual-gpu.rst
+++ b/doc/source/admin/virtual-gpu.rst
@ -293,6 +293,36 @@ Caveats
   This information is correct as of the 17.0.0 Queens release. Where
   improvements have been made or issues fixed, they are noted per item.

+* When live-migrating an instance using vGPUs, the libvirt guest domain XML
+  isn't updated with the new mediated device UUID to use for the target.
+
+  .. versionchanged:: 29.0.0
+
+   In the 2024.2 Caracal release, Nova now `supports vGPU live-migrations`_. In
+   order to do this, both the source and target compute service need to have
+   minimum versions of libvirt-8.6.0, QEMU-8.1.0 and Linux kernel 5.18.0. You
+   need to ensure that either you use only single common vGPU type between two
+   computes. Where multiple mdev types are configured on the source and
+   destination host, custom traits or custom resource classes must be
+   configured, reported by the host and requested by the instance to make sure
+   that the Placement API correctly returns the supported GPU using the right
+   vGPU type for a migration. Last but not least, if you want to live-migrate
+   nVidia mediated devices, you need to update
+   :oslo.config:option:`libvirt.live_migration_downtime`,
+   :oslo.config:option:`libvirt.live_migration_downtime_steps` and
+   :oslo.config:option:`libvirt.live_migration_downtime_delay`:
+
+   .. code-block:: ini
+
+      live_migration_downtime = 500000
+      live_migration_downtime_steps = 3
+      live_migration_downtime_delay = 3
+
+   You can see an example of a working live-migration `here`__.
+
+   .. __: http://sbauza.github.io/vgpu/vgpu_live_migration.html
+
+
 * Suspending a guest that has vGPUs doesn't yet work because of a libvirt
  limitation (it can't hot-unplug mediated devices from a guest). Workarounds
  using other instance actions (like snapshotting the instance or shelving it)
@ -355,6 +385,7 @@ For nested vGPUs:
 .. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563
 .. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
 .. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
+.. _supports vGPU live-migrations: https://specs.openstack.org/openstack/nova-specs/specs/2024.1/approved/libvirt-mdev-live-migrate.html

 .. Links
 .. _Intel GVT-g: https://01.org/igvt-g
--- a/nova/tests/functional/libvirt/test_vgpu.py
+++ b/nova/tests/functional/libvirt/test_vgpu.py
@ -576,10 +576,9 @@ class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
        mdevs = self.src.driver._get_all_assigned_mediated_devices(inst)
        self.assertEqual(1, len(mdevs))
        self._live_migrate(self.server, 'completed')
-        # FIXME(sbauza): The domain is fully copied to the destination so the
-        # XML contains the original mdev but given the 'devices' attribute on
-        # the fixture doesn't have it, that's why we have a KeyError.
-        self.assertRaises(KeyError, self.assert_mdev_usage, self.dest, 0)
+        # Now the destination XML is updated, so the destination mdev is
+        # correctly used.
+        self.assert_mdev_usage(self.dest, 1)


 class VGPULiveMigrationTestsLMFailed(VGPULiveMigrationTests):
--- a/nova/tests/unit/virt/libvirt/test_migration.py
+++ b/nova/tests/unit/virt/libvirt/test_migration.py
@ -190,6 +190,40 @@ class UtilityMigrationTestCase(test.NoDBTestCase):
        new_xml = new_xml.replace("/dev/dax0.2", "/dev/dax2.0")
        self.assertXmlEqual(res, new_xml)

+    def test_update_mdev_xml(self):
+        xml_pattern = """<domain>
+  <devices>
+    <hostdev mode="subsystem" type="mdev" model="vfio-pci">
+      <source>
+        <address uuid="%s"/>
+      </source>
+    </hostdev>
+  </devices>
+</domain>"""
+        data = objects.LibvirtLiveMigrateData(
+            target_mdevs={uuids.src_mdev: uuids.dst_mdev})
+        doc = etree.fromstring(xml_pattern % uuids.src_mdev)
+        res = migration._update_mdev_xml(doc, data.target_mdevs)
+        self.assertEqual(xml_pattern % uuids.dst_mdev,
+                         etree.tostring(res, encoding='unicode'))
+
+    def test_update_mdev_xml_fails_on_notfound_mdev(self):
+        xml_pattern = """<domain>
+  <devices>
+    <hostdev mode="subsystem" type="mdev" model="vfio-pci">
+      <source>
+        <address uuid="%s"/>
+      </source>
+    </hostdev>
+  </devices>
+</domain>"""
+        data = objects.LibvirtLiveMigrateData(
+            target_mdevs={uuids.other_mdev: uuids.dst_mdev})
+        doc = etree.fromstring(xml_pattern % uuids.src_mdev)
+        # src_mdev UUID doesn't exist in target_mdevs dict
+        self.assertRaises(exception.NovaException,
+                          migration._update_mdev_xml, doc, data.target_mdevs)
+
    def test_update_numa_xml(self):
        doc = etree.fromstring("""
            <domain>
--- a/nova/virt/libvirt/migration.py
+++ b/nova/virt/libvirt/migration.py
@ -67,6 +67,8 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
        xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
    if 'dst_numa_info' in migrate_data:
        xml_doc = _update_numa_xml(xml_doc, migrate_data)
+    if 'target_mdevs' in migrate_data:
+        xml_doc = _update_mdev_xml(xml_doc, migrate_data.target_mdevs)
    if new_resources:
        xml_doc = _update_device_resources_xml(xml_doc, new_resources)
    return etree.tostring(xml_doc, encoding='unicode')
@ -106,6 +108,28 @@ def _update_vpmems_xml(xml_doc, vpmems):
    return xml_doc


+def _update_mdev_xml(xml_doc, target_mdevs):
+    for dev in xml_doc.findall('./devices/hostdev'):
+        if dev.get('type') == 'mdev':
+            address_tag = dev.find('source/address')
+            if address_tag is None:
+                continue
+            src_mdev = address_tag.get('uuid')
+            if src_mdev is not None:
+                dst_mdev = target_mdevs.get(src_mdev)
+                if dst_mdev is None:
+                    # For some reason, we don't know which mdev to use
+                    # so we prefer to abort the live-migration.
+                    raise exception.NovaException(
+                        'Unable to find the destination mediated device UUID '
+                        'to use for this source mdev UUID : %s' % src_mdev)
+                else:
+                    address_tag.set('uuid', dst_mdev)
+    LOG.debug('_update_mdev_xml output xml=%s',
+              etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
+    return xml_doc
+
+
 def _update_numa_xml(xml_doc, migrate_data):
    LOG.debug('_update_numa_xml input xml=%s',
              etree.tostring(xml_doc, encoding='unicode', pretty_print=True))
--- a/releasenotes/notes/bp-libvirt-mdev-live-migrate-4396dbe4d9a9775f.yaml
+++ b/releasenotes/notes/bp-libvirt-mdev-live-migrate-4396dbe4d9a9775f.yaml
@ -0,0 +1,13 @@
+---
+features:
+  - |
+    Instances using vGPUs can now be correctly live-migrated by the libvirt
+    driver between compute nodes supporting the same mediated device types used
+    by the instance. In order to be able to do this, the compute hosts need to
+    support at least the minimum versions of libvirt-8.6.0, QEMU-8.1.0 and
+    Linux kernel 5.18.0. If operators use multiple vGPU types per compute, they
+    need to make sure they already use custom traits or custom resource classes
+    for the GPUs resource providers and that the instance was created with a
+    flavor using either a custom resource class or asking for a custom trait in
+    order to make sure that Placement API will provide the right target GPU
+    using the same mdev type for the instance.