From 32bbbd698a2a9c5ca6f0b01662d94c64e21422b1 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Tue, 28 Apr 2020 12:17:08 +0200 Subject: [PATCH] Test multi create with vGPUs We had a bug in Rocky where multicreate wasn't working correctly, but given in Stein we provided Resource Providers for each pGPU, this is fixed now. NOTE: We have a related bug #1874664 because multicreate doesn't work with nested Resource Providers. We could btw. move the regression test to a specific module in the regressions tests subdirectory. Change-Id: I8154917ff142987e80dc711e3b2b3965a21f08d0 Related-Bug: #1780225 Related-Bug: #1874664 --- nova/tests/functional/libvirt/test_vgpu.py | 72 ++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 9bd1c12b3ba7..389c9527ff14 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -14,16 +14,19 @@ import fixtures import re +import collections import mock import os_resource_classes as orc from oslo_config import cfg from oslo_log import log as logging from oslo_utils import uuidutils +from nova.compute import instance_actions import nova.conf from nova import context from nova import objects from nova.tests.functional.libvirt import base +from nova.tests.unit import policy_fixture from nova.tests.unit.virt.libvirt import fakelibvirt from nova.virt.libvirt import driver as libvirt_driver from nova.virt.libvirt import utils as libvirt_utils @@ -149,10 +152,16 @@ class VGPUTests(VGPUTestBase): return_value=[])) self.useFixture(fixtures.MockPatch('os.rename')) + policy = self.useFixture(policy_fixture.RealPolicyFixture()) + # Allow non-admins to see instance action events. + policy.set_rules({ + 'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner' + }, overwrite=False) + self.compute1 = self._start_compute_service(_DEFAULT_HOST) def assert_vgpu_usage_for_compute(self, compute, expected): - total_usage = 0 + total_usages = collections.defaultdict(int) # We only want to get mdevs that are assigned to instances mdevs = compute.driver._get_all_assigned_mediated_devices() for mdev in mdevs: @@ -163,10 +172,11 @@ class VGPUTests(VGPUTestBase): parent_rp_name = compute.host + '_' + parent_name parent_rp_uuid = self._get_provider_uuid_by_name(parent_rp_name) parent_usage = self._get_provider_usages(parent_rp_uuid) - if orc.VGPU in parent_usage: - total_usage += parent_usage[orc.VGPU] + if orc.VGPU in parent_usage and parent_rp_name not in total_usages: + # We only set the total amount if we didn't had it already + total_usages[parent_rp_name] = parent_usage[orc.VGPU] self.assertEqual(expected, len(mdevs)) - self.assertEqual(expected, total_usage) + self.assertEqual(expected, sum(total_usages[k] for k in total_usages)) def test_create_servers_with_vgpu(self): self._create_server( @@ -223,6 +233,60 @@ class VGPUTests(VGPUTestBase): self.assert_vgpu_usage_for_compute(self.compute1, expected=0) self.assert_vgpu_usage_for_compute(self.compute2, expected=1) + def test_multiple_instance_create(self): + body = self._build_server( + name=None, image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, networks='auto', az=None, + host=self.compute1.host) + # Asking to multicreate two instances, each of them asking for 1 vGPU + body['min_count'] = 2 + server = self.api.post_server({'server': body}) + self._wait_for_state_change(server, 'ACTIVE') + + # Let's verify we created two mediated devices and we have a total of + # 2 vGPUs + self.assert_vgpu_usage_for_compute(self.compute1, expected=2) + + def test_multiple_instance_create_filling_up_capacity(self): + # Each pGPU created by fakelibvirt defaults to a capacity of 16 vGPUs. + # By default, we created a compute service with 2 pGPUs before, so we + # have a total capacity of 32. In theory, we should be able to find + # space for two instances asking for 16 vGPUs each. + extra_spec = {"resources:VGPU": "16"} + flavor = self._create_flavor(extra_spec=extra_spec) + body = self._build_server( + name=None, image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=flavor, networks='auto', az=None, + host=self.compute1.host) + # Asking to multicreate two instances, each of them asking for 8 vGPU + body['min_count'] = 2 + server = self.api.post_server({'server': body}) + # But... we fail miserably because of bug #1874664 + # FIXME(sbauza): Change this once we fix the above bug + server = self._wait_for_state_change(server, 'ERROR') + self.assertIn('fault', server) + self.assertIn('No valid host', server['fault']['message']) + self.assertEqual('', server['hostId']) + # Assert the "create" instance action exists and is failed. + actions = self.api.get_instance_actions(server['id']) + self.assertEqual(1, len(actions), actions) + action = actions[0] + self.assertEqual(instance_actions.CREATE, action['action']) + self.assertEqual('Error', action['message']) + # Get the events. There should be one with an Error result. + action = self.api.api_get( + '/servers/%s/os-instance-actions/%s' % + (server['id'], action['request_id'])).body['instanceAction'] + events = action['events'] + self.assertEqual(1, len(events), events) + event = events[0] + self.assertEqual('conductor_schedule_and_build_instances', + event['event']) + self.assertEqual('Error', event['result']) + # Normally non-admins cannot see the event traceback but we enabled + # that via policy in setUp so assert something was recorded. + self.assertIn('select_destinations', event['traceback']) + class VGPUMultipleTypesTests(VGPUTestBase):