From 1c59397e09de5506bccba513ef31ffb8585fcdc3 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Fri, 22 Jun 2018 14:53:57 +0200 Subject: [PATCH] libvirt: Fix the rescue race for vGPU instances When rescuing an instance having a vGPU, we were not using the vGPU. There would then be a race condition during the rescue where the vGPU could be passed to another instance. Instead, we should just make sure the vGPU would also be in the rescued instance. Change-Id: I7150e15694bb149ae67da37b5e43b6ea7507fe82 Closes-bug: #1762688 --- doc/source/admin/virtual-gpu.rst | 6 +++++- nova/tests/unit/virt/libvirt/test_driver.py | 10 ++++++++++ nova/virt/libvirt/driver.py | 7 ++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index ab8e2c9e4f80..f7b76a67b2e7 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -170,7 +170,8 @@ Caveats .. note:: - All the caveats are related to the Queens release + This information is correct as of the 17.0.0 Queens release. Where + improvements have been made or issues fixed, they are noted per item. For libvirt: @@ -197,6 +198,8 @@ For libvirt: instance immediately after rescue. However, rebuilding the rescued instance only helps if there are other free vGPUs on the host. + .. note:: This has been resolved in the Rocky release [#]_. + For XenServer: * Suspend and live migration with vGPUs attached depends on support from the @@ -217,6 +220,7 @@ For XenServer: resize. If you want to migrate an instance, make sure to rebuild it after the migration. +.. [#] https://bugs.launchpad.net/nova/+bug/1762688 .. Links .. _Intel GVT-g: https://01.org/igvt-g diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 6e84a66b7f95..b77355f3b6ee 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -18996,6 +18996,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase): self.drvr.detach_interface(self.context, instance, network_info[0]) self.mox.VerifyAll() + @mock.patch('nova.virt.libvirt.LibvirtDriver.' + '_get_all_assigned_mediated_devices') @mock.patch('nova.virt.libvirt.utils.write_to_file') # NOTE(mdbooth): The following 4 mocks are required to execute # get_guest_xml(). @@ -19007,11 +19009,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase): mock_instance_metadata, mock_supports_direct_io, mock_build_device_metadata, mock_set_host_enabled, mock_write_to_file, + mock_get_mdev, exists=None): self.flags(instances_path=self.useFixture(fixtures.TempDir()).path) mock_build_device_metadata.return_value = None mock_supports_direct_io.return_value = True + mock_get_mdev.return_value = {uuids.mdev1: uuids.inst1} + backend = self.useFixture( fake_imagebackend.ImageBackendFixture(exists=exists)) @@ -19075,6 +19080,11 @@ class LibvirtDriverTestCase(test.NoDBTestCase): self.assertEqual(expected_kernel_ramdisk_paths, kernel_ramdisk_paths) + # The generated domain XML should also contain any existing mdev + self.assertEqual( + [uuids.mdev1], + doc.xpath("devices/*[@type='mdev']/source/address/@uuid")) + @mock.patch('nova.virt.configdrive.ConfigDriveBuilder._make_iso9660') def test_rescue_config_drive(self, mock_mkisofs): instance = self._create_instance({'config_drive': str(True)}) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 91e205025e9c..e4a07f9064fe 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -2959,11 +2959,16 @@ class LibvirtDriver(driver.ComputeDriver): gen_confdrive = functools.partial(self._create_configdrive, context, instance, injection_info, rescue=True) + # NOTE(sbauza): Since rescue recreates the guest XML, we need to + # remember the existing mdevs for reusing them. + mdevs = self._get_all_assigned_mediated_devices(instance) + mdevs = list(mdevs.keys()) self._create_image(context, instance, disk_info['mapping'], injection_info=injection_info, suffix='.rescue', disk_images=rescue_images) xml = self._get_guest_xml(context, instance, network_info, disk_info, - image_meta, rescue=rescue_images) + image_meta, rescue=rescue_images, + mdevs=mdevs) self._destroy(instance) self._create_domain(xml, post_xml_callback=gen_confdrive)