check both source and dest compute libvirt versions for mdev lv

Since only qemu 8.1 and libvirt 8.6.0 supports mdev live-migration,
we need to verify the values of the hypervisor for both the source
and the destination.

If one of them are older, the conductor raises an exception that will
eventually fact the API to return an HTTP500.

Change-Id: I17f170143c58401b8b0a5a93e83355b1f7178ab5
Partially-Implements: blueprint libvirt-mdev-live-migrate
This commit is contained in:
Sylvain Bauza 2023-12-20 12:00:50 +01:00
parent fbdd68d4de
commit baa78326dd
3 changed files with 377 additions and 8 deletions

View File

@ -25,6 +25,7 @@ import nova.conf
from nova import context
from nova import objects
from nova.tests.fixtures import libvirt as fakelibvirt
from nova.tests.functional.api import client
from nova.tests.functional.libvirt import base
from nova.virt.libvirt import driver as libvirt_driver
from nova.virt.libvirt import utils as libvirt_utils
@ -40,8 +41,8 @@ class VGPUTestBase(base.ServersTestBase):
microversion = 'latest'
ADMIN_API = True
FAKE_LIBVIRT_VERSION = 5000000
FAKE_QEMU_VERSION = 3001000
FAKE_LIBVIRT_VERSION = 7000000
FAKE_QEMU_VERSION = 5002000
# Since we run all computes by a single process, we need to identify which
# current compute service we use at the moment.
@ -113,12 +114,16 @@ class VGPUTestBase(base.ServersTestBase):
parent=libvirt_parent)})
return uuid
def start_compute_with_vgpu(self, hostname):
hostname = self.start_compute(
pci_info=fakelibvirt.HostPCIDevicesInfo(
def start_compute_with_vgpu(self, hostname, pci_info=None):
if not pci_info:
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
),
)
hostname = self.start_compute(
pci_info=pci_info,
hostname=hostname,
libvirt_version=self.FAKE_LIBVIRT_VERSION,
qemu_version=self.FAKE_QEMU_VERSION
)
compute = self.computes[hostname]
rp_uuid = self.compute_rp_uuids[hostname]
@ -127,7 +132,8 @@ class VGPUTestBase(base.ServersTestBase):
inventory = self._get_provider_inventory(rp)
if orc.VGPU in inventory:
usage = self._get_provider_usages(rp)
self.assertEqual(16, inventory[orc.VGPU]['total'])
# if multiple types, the inventories are different
self.assertIn(inventory[orc.VGPU]['total'], [8, 16])
self.assertEqual(0, usage[orc.VGPU])
# Since we haven't created any mdevs yet, we shouldn't find them
self.assertEqual([], compute.driver._get_mediated_devices())
@ -423,6 +429,131 @@ class VGPUMultipleTypesTests(VGPUTestBase):
self.assertEqual(expected[trait], mdev_info['parent'])
class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
# Use the right minimum versions for live-migration
FAKE_LIBVIRT_VERSION = 8006000
FAKE_QEMU_VERSION = 8001000
def setUp(self):
# Prepares two computes (src and dst), each of them having two GPUs
# (81:00.0 and 81:01.0) with two types but where the operator only
# wants to supports nvidia-11 by 81:00.0 and nvidia-12 by 81:01.0
super(VGPULiveMigrationTests, self).setUp()
# Let's set the configuration correctly.
self.flags(
enabled_mdev_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE,
fakelibvirt.NVIDIA_12_VGPU_TYPE],
group='devices')
# we need to call the below again to ensure the updated
# 'device_addresses' value is read and the new groups created
nova.conf.devices.register_dynamic_opts(CONF)
MDEVCAP_DEV1_PCI_ADDR = self.libvirt2pci_address(
fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
MDEVCAP_DEV2_PCI_ADDR = self.libvirt2pci_address(
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
self.flags(device_addresses=[MDEVCAP_DEV1_PCI_ADDR],
group='mdev_nvidia-11')
self.flags(device_addresses=[MDEVCAP_DEV2_PCI_ADDR],
group='mdev_nvidia-12')
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
multiple_gpu_types=True)
self.src = self.start_compute_with_vgpu('src', pci_info=pci_info)
self.dest = self.start_compute_with_vgpu('dest', pci_info=pci_info)
# Add the custom traits to the 4 resource providers (two per host as
# we have two pGPUs)
self._create_trait('CUSTOM_NVIDIA_11')
self._create_trait('CUSTOM_NVIDIA_12')
for host in [self.src.host, self.dest.host]:
nvidia11_rp_uuid = self._get_provider_uuid_by_name(
host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
nvidia12_rp_uuid = self._get_provider_uuid_by_name(
host + '_' + fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
self._set_provider_traits(nvidia11_rp_uuid, ['CUSTOM_NVIDIA_11'])
self._set_provider_traits(nvidia12_rp_uuid, ['CUSTOM_NVIDIA_12'])
# We will test to live-migrate an instance using nvidia-11 type.
extra_spec = {"resources:VGPU": "1",
"trait:CUSTOM_NVIDIA_11": "required"}
self.flavor = self._create_flavor(extra_spec=extra_spec)
def test_live_migration_fails_on_old_source(self):
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
multiple_gpu_types=True)
self.src = self.restart_compute_service(
self.src.host,
pci_info=pci_info,
keep_hypervisor_state=False,
qemu_version=8000000,
libvirt_version=8005000)
server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, networks='auto', host=self.src.host)
# now live migrate that server
ex = self.assertRaises(
client.OpenStackApiException,
self._live_migrate,
server, 'completed')
self.assertEqual(500, ex.response.status_code)
self.assertIn('NoValidHost', str(ex))
log_out = self.stdlog.logger.output
self.assertIn('Migration pre-check error: Unable to migrate %s: '
'Either libvirt or QEMU version for compute service '
'source are too old than the supported ones '
'' % server['id'], log_out)
def test_live_migration_fails_on_old_destination(self):
# For the fact to testing that we look at the dest object, we need to
# skip the verification for whether the destination HV version is older
self.flags(skip_hypervisor_version_check_on_lm=True,
group='workarounds')
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
multiple_gpu_types=True)
self.dest = self.restart_compute_service(
self.dest.host,
pci_info=pci_info,
keep_hypervisor_state=False,
qemu_version=8000000,
libvirt_version=8005000)
server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, networks='auto', host=self.src.host)
# now live migrate that server
ex = self.assertRaises(
client.OpenStackApiException,
self._live_migrate,
server, 'completed')
self.assertEqual(500, ex.response.status_code)
self.assertIn('NoValidHost', str(ex))
log_out = self.stdlog.logger.output
self.assertIn('Migration pre-check error: Unable to migrate %s: '
'Either libvirt or QEMU version for compute service '
'target are too old than the supported ones '
'' % server['id'],
log_out)
def test_live_migrate_server(self):
self.server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, networks='auto', host=self.src.host)
inst = objects.Instance.get_by_uuid(self.context, self.server['id'])
mdevs = self.src.driver._get_all_assigned_mediated_devices(inst)
self.assertEqual(1, len(mdevs))
self._live_migrate(self.server, 'completed')
# FIXME(sbauza): The domain is fully copied to the destination so the
# XML contains the original mdev but given the 'devices' attribute on
# the fixture doesn't have it, that's why we have a KeyError.
self.assertRaises(KeyError, self.assert_mdev_usage, self.dest, 0)
class DifferentMdevClassesTests(VGPUTestBase):
def setUp(self):

View File

@ -11547,6 +11547,50 @@ class LibvirtConnTestCase(test.NoDBTestCase,
self.context, instance_ref, compute_info, compute_info)
self.assertNotIn('dst_supports_numa_live_migration', result)
@mock.patch(
'nova.network.neutron.API.has_port_binding_extension',
new=mock.Mock(return_value=False))
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_host_can_support_mdev_live_migration')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_create_shared_storage_test_file',
return_value='fake')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_compare_cpu')
def test_check_can_live_migrate_dest_mdev_lm(
self, mock_cpu, mock_test_file, mock_can_sup_mdev_lm,
):
mock_can_sup_mdev_lm.return_value = True
instance_ref = objects.Instance(**self.test_instance)
instance_ref.numa_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell()])
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
compute_info = {'cpu_info': 'asdf', 'disk_available_least': 1}
result = drvr.check_can_live_migrate_destination(
self.context, instance_ref, compute_info, compute_info)
self.assertTrue(result.dst_supports_mdev_live_migration)
@mock.patch(
'nova.network.neutron.API.has_port_binding_extension',
new=mock.Mock(return_value=False))
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_host_can_support_mdev_live_migration')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_create_shared_storage_test_file',
return_value='fake')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_compare_cpu')
def test_check_can_live_migrate_dest_mdev_lm_no_host_support(
self, mock_cpu, mock_test_file, mock_can_sup_mdev_lm,
):
mock_can_sup_mdev_lm.return_value = False
instance_ref = objects.Instance(**self.test_instance)
instance_ref.numa_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell()])
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
compute_info = {'cpu_info': 'asdf', 'disk_available_least': 1}
result = drvr.check_can_live_migrate_destination(
self.context, instance_ref, compute_info, compute_info)
self.assertNotIn('dst_supports_mdev_live_migration', result)
@mock.patch(
'nova.network.neutron.API.has_port_binding_extension',
new=mock.Mock(return_value=False))
@ -11655,6 +11699,41 @@ class LibvirtConnTestCase(test.NoDBTestCase,
for vif in result.vifs:
self.assertTrue(vif.supports_os_vif_delegation)
@mock.patch.object(fakelibvirt.Connection, 'getLibVersion')
@mock.patch.object(fakelibvirt.Connection, 'getVersion')
def _test_host_can_support_mdev_lm(self, mock_getversion,
mock_getlibversion,
old_libvirt, old_qemu, expected):
min_libvirt_ver = versionutils.convert_version_to_int(
libvirt_driver.MIN_MDEV_LIVEMIG_LIBVIRT_VERSION)
min_qemu_ver = versionutils.convert_version_to_int(
libvirt_driver.MIN_MDEV_LIVEMIG_QEMU_VERSION)
mock_getversion.return_value = (min_qemu_ver - 1 if old_qemu
else min_qemu_ver)
mock_getlibversion.return_value = (min_libvirt_ver - 1 if old_libvirt
else min_libvirt_ver)
driver = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.assertEqual(expected,
driver._host_can_support_mdev_live_migration())
def test_host_can_support_mdev_lm(self):
self._test_host_can_support_mdev_lm(old_libvirt=False, old_qemu=False,
expected=True)
def test_host_can_support_mdev_lm_old_libvirt(self):
self._test_host_can_support_mdev_lm(old_libvirt=True, old_qemu=False,
expected=False)
def test_host_can_support_mdev_lm_old_qemu(self):
self._test_host_can_support_mdev_lm(old_libvirt=False, old_qemu=True,
expected=False)
def test_host_can_support_mdev_lm_both_old(self):
self._test_host_can_support_mdev_lm(old_libvirt=True, old_qemu=True,
expected=False)
@mock.patch.object(host.Host, 'compare_hypervisor_cpu')
@mock.patch.object(nova.virt.libvirt, 'config')
def test_compare_cpu_compatible_host_cpu(self, mock_vconfig, mock_compare):
@ -11786,17 +11865,28 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_utime.assert_called_once_with(CONF.instances_path, None)
mock_path_exists.assert_called_once_with(tmpfile_path)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_mdev_types_from_uuids')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_assert_source_can_live_migrate_mdevs')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_get_all_assigned_mediated_devices')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_check_shared_storage_test_file')
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_is_shared_block_storage')
def _test_can_live_migrate_source(self, mock_is_shared, mock_check_shared,
mock_get_all_assigned_mdevs,
mock_assert_src_can_lv_mdevs,
mock_get_types,
block_migration=False,
is_shared_block_storage=False,
is_shared_instance_path=False,
disk_available_mb=1024,
exception=None,
numa_lm=True):
numa_lm=True,
instance_mdevs=False,
assert_can_lv_mdevs=True):
instance = objects.Instance(**self.test_instance)
if numa_lm:
instance.numa_topology = objects.InstanceNUMATopology(cells=[
@ -11812,6 +11902,15 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_is_shared.return_value = is_shared_block_storage
mock_check_shared.return_value = is_shared_instance_path
if not instance_mdevs:
mock_get_all_assigned_mdevs.return_value = {}
else:
mock_get_all_assigned_mdevs.return_value = instance_mdevs
mock_get_types.return_value = {uuids.mdev1: 'fake_type'}
if not assert_can_lv_mdevs:
mock_assert_src_can_lv_mdevs.side_effect = exception(
reason='kaboom')
if exception:
self.assertRaises(exception, drvr.check_can_live_migrate_source,
self.context, instance, dest_check_data)
@ -11842,6 +11941,12 @@ class LibvirtConnTestCase(test.NoDBTestCase,
if is_shared_instance_path:
self.assertTrue(ret.is_shared_instance_path)
if instance_mdevs:
mock_get_types.assert_called_once_with(instance_mdevs.keys())
self.assertEqual({uuids.mdev1: 'fake_type'}, ret.source_mdev_types)
if assert_can_lv_mdevs:
mock_assert_src_can_lv_mdevs.assert_called_once_with(
instance, dest_check_data)
return (instance, dest_check_data)
@mock.patch.object(libvirt_driver.LibvirtDriver,
@ -11894,6 +11999,53 @@ class LibvirtConnTestCase(test.NoDBTestCase,
exception=exception.MigrationError)
mock_get_bdi.assert_called_once_with(instance, None)
def test_check_can_live_migrate_source_mdev_lm(self):
instance_mdevs = {uuids.mdev1: uuids.inst1}
self._test_can_live_migrate_source(is_shared_block_storage=True,
instance_mdevs=instance_mdevs)
def test_check_can_live_migrate_source_mdev_lm_fails(self):
instance_mdevs = {uuids.mdev1: uuids.inst1}
self._test_can_live_migrate_source(
is_shared_block_storage=True,
instance_mdevs=instance_mdevs,
assert_can_lv_mdevs=False,
exception=exception.MigrationPreCheckError)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_host_can_support_mdev_live_migration')
def test_assert_source_can_lv_mdevs_fails_due_to_src(self, mock_host):
mock_host.return_value = False
instance = objects.Instance(**self.test_instance)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
dest_check_data = objects.LibvirtLiveMigrateData(
dst_supports_mdev_live_migration=True)
self.assertRaises(exception.MigrationPreCheckError,
drvr._assert_source_can_live_migrate_mdevs,
instance, dest_check_data)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_host_can_support_mdev_live_migration')
def test_assert_source_can_lv_mdevs_fails_due_to_dest(self, mock_host):
mock_host.return_value = True
instance = objects.Instance(**self.test_instance)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
dest_check_data = objects.LibvirtLiveMigrateData()
self.assertRaises(exception.MigrationPreCheckError,
drvr._assert_source_can_live_migrate_mdevs,
instance, dest_check_data)
@mock.patch.object(libvirt_driver.LibvirtDriver,
'_host_can_support_mdev_live_migration')
def test_assert_source_can_lv_mdevs_works(self, mock_host):
mock_host.return_value = True
instance = objects.Instance(**self.test_instance)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
dest_check_data = objects.LibvirtLiveMigrateData(
dst_supports_mdev_live_migration=True)
drvr._assert_source_can_live_migrate_mdevs(instance, dest_check_data)
mock_host.assert_called_once_with()
@mock.patch.object(host.Host, 'has_min_version', return_value=True)
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
'_assert_dest_node_has_enough_disk')
@ -26802,6 +26954,22 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# we don't get results.
self.assertEqual([], drvr._get_mediated_devices(types=['nvidia-12']))
@mock.patch.object(libvirt_driver.LibvirtDriver, '_get_mediated_devices')
def test_get_mdev_types_from_uuids(self, mock_get_mdevs):
mock_get_mdevs.return_value = [
{"uuid": uuids.mdev1, "type": "nvidia-11"}]
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.assertEqual({uuids.mdev1: "nvidia-11"},
drvr._get_mdev_types_from_uuids([uuids.mdev1,
uuids.mdev3]))
self.assertEqual({}, drvr._get_mdev_types_from_uuids([uuids.mdev2]))
@mock.patch.object(libvirt_driver.LibvirtDriver, '_get_mediated_devices')
def test_get_mdev_types_from_uuids_missing(self, mock_get_mdevs):
mock_get_mdevs.return_value = []
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
self.assertEqual({}, drvr._get_mdev_types_from_uuids([uuids.mdev1]))
@mock.patch.object(host.Host, 'list_guests')
def test_get_all_assigned_mediated_devices(self, list_guests):
dom_with_vgpu = """

View File

@ -239,6 +239,10 @@ ALLOWED_QEMU_SERIAL_PORTS = QEMU_MAX_SERIAL_PORTS - 1
VGPU_RESOURCE_SEMAPHORE = 'vgpu_resources'
# Minimum versions supporting mdev live-migration.
MIN_MDEV_LIVEMIG_LIBVIRT_VERSION = (8, 6, 0)
MIN_MDEV_LIVEMIG_QEMU_VERSION = (8, 1, 0)
LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_'
# Maxphysaddr minimal support version.
@ -8396,6 +8400,19 @@ class LibvirtDriver(driver.ComputeDriver):
mediated_devices.append(device)
return mediated_devices
def _get_mdev_types_from_uuids(self, mdev_uuids):
"""Returns a dict of mdevs and their type from a list of mediated
device UUIDs. If no mdevs are actually using those UUIDs, it returns an
empty dict.
:param mdev_uuids: List of existing mediated device UUIDs.
:returns: dict where key is the mdev UUID and the value is its type.
"""
host_mdevs = self._get_mediated_devices()
inst_dev_infos = filter(lambda dev: dev['uuid'] in mdev_uuids,
host_mdevs)
return {mdev['uuid']: mdev['type'] for mdev in inst_dev_infos}
def _get_all_assigned_mediated_devices(self, instance=None):
"""Lookup all instances from the host and return all the mediated
devices that are assigned to a guest.
@ -9747,6 +9764,11 @@ class LibvirtDriver(driver.ComputeDriver):
for vif in data.vifs:
vif.supports_os_vif_delegation = True
# Just flag the fact we can live-migrate mdevs even if we don't use
# them so the source will know we can use this compute.
if self._host_can_support_mdev_live_migration():
data.dst_supports_mdev_live_migration = True
return data
def post_claim_migrate_data(self, context, instance, migrate_data, claim):
@ -9926,8 +9948,56 @@ class LibvirtDriver(driver.ComputeDriver):
if instance.numa_topology:
dest_check_data.src_supports_numa_live_migration = True
# If we have mediated devices to live-migrate, just verify we can
# support them.
instance_mdevs = self._get_all_assigned_mediated_devices(instance)
if instance_mdevs:
# This can raise a MigrationPreCheckError if the target is too old
# or if the current QEMU or libvirt versions from this compute are
# too old (only if the current instance uses mdevs)
self._assert_source_can_live_migrate_mdevs(instance,
dest_check_data)
mdev_types = self._get_mdev_types_from_uuids(instance_mdevs.keys())
dest_check_data.source_mdev_types = mdev_types
return dest_check_data
def _host_can_support_mdev_live_migration(self):
return self._host.has_min_version(
lv_ver=MIN_MDEV_LIVEMIG_LIBVIRT_VERSION,
hv_ver=MIN_MDEV_LIVEMIG_QEMU_VERSION,
hv_type=host.HV_DRIVER_QEMU,
)
def _assert_source_can_live_migrate_mdevs(self, instance, dest_check_data):
"""Check if the source can live migrate the instance by looking at the
QEMU and libvirt versions but also at the destination object.
:param instance: nova.objects.instance.Instance object
:param migrate_data: nova.objects.LibvirtLiveMigrateData object
:raises: MigrationPreCheckError if the versions are too old or if the
dst_supports_mdev_live_migration sentinel is not True.
"""
failed = ''
if not self._host_can_support_mdev_live_migration():
failed = 'source'
elif not ('dst_supports_mdev_live_migration' in dest_check_data and
dest_check_data.dst_supports_mdev_live_migration):
failed = 'target'
if failed:
reason = (_('Unable to migrate %(instance_uuid)s: '
'Either libvirt or QEMU version for compute service '
'%(host)s are too old than the supported ones : '
'(QEMU: %(qemu_v)s, libvirt: %(libv_v)s)' %
{'instance_uuid': instance.uuid,
'host': failed,
'qemu_v': libvirt_utils.version_to_string(
MIN_MDEV_LIVEMIG_QEMU_VERSION),
'libv_v': libvirt_utils.version_to_string(
MIN_MDEV_LIVEMIG_LIBVIRT_VERSION)}))
raise exception.MigrationPreCheckError(reason=reason)
def _is_shared_block_storage(self, instance, dest_check_data,
block_device_info=None):
"""Check if all block storage of an instance can be shared