libvirt: Cap with max_instances GPU types

We want to cap a maximum mdevs we can create.
If some type has enough capacity, then other GPUs won't be used and
existing ResourceProviders would be deleted.

Closes-Bug: #2041519
Change-Id: I069879a333152bb849c248b3dcb56357a11d0324
This commit is contained in:
Sylvain Bauza 2023-10-30 18:11:46 +01:00
parent 2f6418d1a7
commit 60851e4464
4 changed files with 170 additions and 6 deletions

View File

@ -89,6 +89,15 @@ def register_dynamic_opts(conf):
'CUSTOM_ if it is not VGPU.')
conf.register_opt(class_opt, group='mdev_%s' % mdev_type)
# Register the '[mdev_$(MDEV_TYPE)]/max_instances' opts
max_inst_opt = cfg.IntOpt(
'max_instances',
default=None, min=1,
help='Number of mediated devices that type can create. '
'If not set, it implies that we use the maximum allowed by '
'the type.')
conf.register_opt(max_inst_opt, group='mdev_%s' % mdev_type)
def list_opts():
return {devices_group: mdev_opts}

View File

@ -429,6 +429,84 @@ class VGPUMultipleTypesTests(VGPUTestBase):
self.assertEqual(expected[trait], mdev_info['parent'])
class VGPULimitMultipleTypesTests(VGPUTestBase):
def setUp(self):
super(VGPULimitMultipleTypesTests, self).setUp()
extra_spec = {"resources:VGPU": "1"}
self.flavor = self._create_flavor(extra_spec=extra_spec)
self.flags(
enabled_mdev_types=[fakelibvirt.NVIDIA_11_VGPU_TYPE,
fakelibvirt.NVIDIA_12_VGPU_TYPE],
group='devices')
# we need to call the below again to ensure the updated
# 'device_addresses' value is read and the new groups created
nova.conf.devices.register_dynamic_opts(CONF)
# host1 will have 2 physical GPUs :
# - 0000:81:00.0 will only support nvidia-11
# - 0000:81:01.0 will only support nvidia-12
MDEVCAP_DEV1_PCI_ADDR = self.libvirt2pci_address(
fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
MDEVCAP_DEV2_PCI_ADDR = self.libvirt2pci_address(
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
self.flags(device_addresses=[MDEVCAP_DEV1_PCI_ADDR],
group='mdev_nvidia-11')
self.flags(device_addresses=[MDEVCAP_DEV2_PCI_ADDR],
group='mdev_nvidia-12')
# Start the compute by supporting both types
pci_info = fakelibvirt.HostPCIDevicesInfo(
num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
multiple_gpu_types=True)
self.compute1 = self.start_compute_with_vgpu('host1', pci_info)
def test_create_servers_with_vgpu(self):
physdev1_rp_uuid = self._get_provider_uuid_by_name(
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
physdev2_rp_uuid = self._get_provider_uuid_by_name(
self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
# Just for asserting the inventories we currently have.
physdev1_inventory = self._get_provider_inventory(physdev1_rp_uuid)
self.assertEqual(16, physdev1_inventory[orc.VGPU]['total'])
physdev2_inventory = self._get_provider_inventory(physdev2_rp_uuid)
self.assertEqual(8, physdev2_inventory[orc.VGPU]['total'])
# Now, let's limit the capacity for the first type to 2
self.flags(max_instances=2, group='mdev_nvidia-11')
# Make a restart to update the Resource Providers
self.compute2 = self.restart_compute_service('host1')
# Make sure we can still create an instance
server = self._create_server(
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
flavor_id=self.flavor, networks='auto', host=self.compute1.host)
mdevs = self.compute1.driver._get_mediated_devices()
self.assertEqual(1, len(mdevs))
# ... but actually looking at Placement, only now the 2nd GPU can be
# used because nvidia-11 was limited to 2 while the GPU supporting it
# was having a 8th capacity.
physdev2_inventory = self._get_provider_inventory(physdev2_rp_uuid)
self.assertEqual(8, physdev2_inventory[orc.VGPU]['total'])
# Get the instance we just created
inst = objects.Instance.get_by_uuid(self.context, server['id'])
expected_rp_name = (self.compute1.host + '_' +
fakelibvirt.MDEVCAP_DEV2_PCI_ADDR)
# Yes, indeed we use the 2nd GPU
self.assert_mdev_usage(self.compute1, expected_amount=1,
expected_rc=orc.VGPU, instance=inst,
expected_rp_name=expected_rp_name)
# ... and what happened to the first GPU inventory ? Well, the whole
# Resource Provider disappeared !
provider = self._get_resource_provider_by_uuid(physdev1_rp_uuid)
self.assertEqual(404, provider['errors'][0]['status'])
self.assertIn(
"No resource provider with uuid %s found" % physdev1_rp_uuid,
provider['errors'][0]['detail'])
class VGPULiveMigrationTests(base.LibvirtMigrationMixin, VGPUTestBase):
# Use the right minimum versions for live-migration

View File

@ -26833,6 +26833,42 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self._test_get_gpu_inventories(drvr, expected, ['nvidia-11',
'nvidia-12'])
def test_get_gpu_inventories_with_max_instances_per_type(self):
self.flags(enabled_mdev_types=['nvidia-11', 'nvidia-12'],
group='devices')
# we need to call the below again to ensure the updated
# 'device_addresses' value is read and the new groups created
nova.conf.devices.register_dynamic_opts(CONF)
self.flags(device_addresses=['0000:06:00.0'], group='mdev_nvidia-11')
self.flags(device_addresses=['0000:07:00.0'], group='mdev_nvidia-12')
# We will cap the max vGPUs for nvidia-11 for 2 but we leave nvidia-12
# uncapped
self.flags(max_instances=2, group='mdev_nvidia-11')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
expected = {
# we don't accept this PCI device because max_instance is less than
# its capacity
'pci_0000_06_00_0': {'total': 0,
'max_unit': 0,
'min_unit': 1,
'step_size': 1,
'reserved': 0,
'allocation_ratio': 1.0,
},
# the second GPU supports nvidia-12 but the existing mdev is not
# using this type, so we only count the availableInstances value
# for nvidia-12.
'pci_0000_07_00_0': {'total': 10,
'max_unit': 10,
'min_unit': 1,
'step_size': 1,
'reserved': 0,
'allocation_ratio': 1.0,
},
}
self._test_get_gpu_inventories(drvr, expected, ['nvidia-11',
'nvidia-12'])
@mock.patch.object(libvirt_driver.LOG, 'warning')
def test_get_supported_vgpu_types(self, mock_warning):
# Verify that by default we don't support vGPU types
@ -26846,6 +26882,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# devices or mdev classes *yet* if we don't have a vgpu type section.
self.assertEqual({}, drvr.pgpu_type_mapping)
self.assertEqual({}, drvr.mdev_class_mapping)
self.assertEqual({}, drvr.mdev_type_max_mapping)
# Remember, we only support the VGPU resource class if we only have
# one needed vGPU type without a specific vgpu type section.
self.assertEqual({orc.VGPU}, drvr.mdev_classes)
@ -26865,6 +26902,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types())
self.assertEqual({}, drvr.pgpu_type_mapping)
self.assertEqual({}, drvr.mdev_class_mapping)
self.assertEqual({}, drvr.mdev_type_max_mapping)
# Here we only support one vGPU type
self.assertEqual({orc.VGPU}, drvr.mdev_classes)
msg = ("The mdev type '%(type)s' was listed in '[devices] "
@ -26882,6 +26920,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.flags(device_addresses=['0000:84:00.0'], group='mdev_nvidia-11')
self.flags(device_addresses=['0000:85:00.0'], group='mdev_nvidia-12')
self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_nvidia-12')
self.flags(max_instances=2, group='mdev_nvidia-11')
self.assertEqual(['nvidia-11', 'nvidia-12'],
drvr._get_supported_vgpu_types())
self.assertEqual({'0000:84:00.0': 'nvidia-11',
@ -26890,6 +26929,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
'0000:85:00.0': 'CUSTOM_NOTVGPU'},
drvr.mdev_class_mapping)
self.assertEqual({orc.VGPU, 'CUSTOM_NOTVGPU'}, drvr.mdev_classes)
# nvidia-12 is unlimited
self.assertEqual({'nvidia-11': 2}, drvr.mdev_type_max_mapping)
mock_warning.assert_not_called()
def test_get_supported_vgpu_types_with_duplicate_types(self):

View File

@ -538,6 +538,8 @@ class LibvirtDriver(driver.ComputeDriver):
)
# This set is for knowing all the mdev classes the operator provides
self.mdev_classes = set([])
# this is for knowing how many mdevs can be created by a type
self.mdev_type_max_mapping = collections.defaultdict(str)
self.supported_vgpu_types = self._get_supported_vgpu_types()
# This dict is for knowing which mdevs are already claimed by some
@ -8217,6 +8219,9 @@ class LibvirtDriver(driver.ComputeDriver):
self.mdev_classes = {first_group.mdev_class}
return [first_type]
mdev_class = group.mdev_class
# By default, max_instances is None
if group.max_instances:
self.mdev_type_max_mapping[vgpu_type] = group.max_instances
for device_address in group.device_addresses:
if device_address in self.pgpu_type_mapping:
raise exception.InvalidLibvirtMdevConfig(
@ -8367,18 +8372,44 @@ class LibvirtDriver(driver.ComputeDriver):
if not enabled_mdev_types:
return {}
inventories = {}
# counting how many mdevs we are currently supporting per type
type_limit_mapping: ty.Dict[str, int] = collections.defaultdict(int)
count_per_parent = self._count_mediated_devices(enabled_mdev_types)
for dev_name, count in count_per_parent.items():
mdev_type = self._get_vgpu_type_per_pgpu(dev_name)
type_limit_mapping[mdev_type] += count
inventories[dev_name] = {'total': count}
# Filter how many available mdevs we can create for all the supported
# types.
count_per_dev = self._count_mdev_capable_devices(enabled_mdev_types)
# Combine the counts into the dict that we return to the caller.
for dev_name, count in count_per_dev.items():
mdev_type = self._get_vgpu_type_per_pgpu(dev_name)
mdev_limit = self.mdev_type_max_mapping.get(mdev_type)
# Some GPU types could have defined limits. For the others, say
# they are just unlimited
# NOTE(sbauza): Instead of not accepting GPUs if their capacity is
# more than the limit, we could just accept them by capping their
# total value by the limit.
if (mdev_limit and
type_limit_mapping[mdev_type] + count > mdev_limit):
# We don't have space for creating new mediated devices
LOG.debug("Skipping to update %s as the available count of "
"mediated devices (%s) is above the maximum we can "
"use (%s)",
dev_name, count,
mdev_limit - type_limit_mapping[mdev_type])
# We want the resource provider to be deleted, so we pass the
# inventory with a total of 0 so _ensure_pgpu_providers() will
# delete it.
inventories[dev_name] = {'total': 0}
continue
type_limit_mapping[mdev_type] += count
inv_per_parent = inventories.setdefault(
dev_name, {'total': 0})
inv_per_parent['total'] += count
inv_per_parent.update({
for dev_name in inventories:
inventories[dev_name].update({
'min_unit': 1,
'step_size': 1,
'reserved': 0,
@ -8386,7 +8417,7 @@ class LibvirtDriver(driver.ComputeDriver):
# since we can't overallocate vGPU resources
'allocation_ratio': 1.0,
# FIXME(sbauza): Some vendors could support only one
'max_unit': inv_per_parent['total'],
'max_unit': inventories[dev_name]['total'],
})
return inventories
@ -9388,12 +9419,17 @@ class LibvirtDriver(driver.ComputeDriver):
# Dict of PGPU RPs keyed by their libvirt PCI name
pgpu_rps = {}
for pgpu_dev_id, inventory in inventories_dict.items():
# Skip (and omit) inventories with total=0 because placement does
# not allow setting total=0 for inventory.
if not inventory['total']:
continue
# For each physical GPU, we make sure to have a child provider
pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id)
# Skip (and omit) inventories with total=0 because placement does
# not allow setting total=0 for inventory. If the inventory already
# exists, we rather delete it.
if not inventory['total']:
if provider_tree.exists(pgpu_rp_name):
LOG.debug('Deleting %s resource provider since it does '
'not longer have any inventory', pgpu_rp_name)
provider_tree.remove(pgpu_rp_name)
continue
if not provider_tree.exists(pgpu_rp_name):
# This is the first time creating the child provider so add
# it to the tree under the root node provider.