From 536d42d807de5566afb6671b3da8ee1a3b85f48e Mon Sep 17 00:00:00 2001 From: Sundar Nadathur Date: Sat, 14 Dec 2019 13:28:13 -0800 Subject: [PATCH] Enable start/stop of instances with accelerators. . Do not delete accelerator requests in stop code paths. . In the start code path, get the list of accelerator requests from Cyborg in the compute manager 'power_on'. . Pass accel_info (said list) to the virt driver power_on. . In libvirt driver, pass that accel_info to driver power_on. Change-Id: I8c94504b87aa4450d163fe2b33f6aa0eb5dae5ff Blueprint: nova-cyborg-interaction --- nova/compute/manager.py | 3 ++- nova/tests/functional/test_servers.py | 28 ++++++++++++++++++++++++- nova/tests/unit/compute/test_compute.py | 21 ++++++++++++++++++- nova/virt/driver.py | 6 +++++- nova/virt/fake.py | 2 +- nova/virt/hyperv/driver.py | 2 +- nova/virt/ironic/driver.py | 5 +++-- nova/virt/libvirt/driver.py | 5 +++-- nova/virt/powervm/driver.py | 2 +- nova/virt/vmwareapi/driver.py | 2 +- nova/virt/xenapi/driver.py | 2 +- nova/virt/zvm/driver.py | 2 +- 12 files changed, 66 insertions(+), 14 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index f5239017bc77..8be0f2db288d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -3099,9 +3099,10 @@ class ComputeManager(manager.Manager): network_info = self.network_api.get_instance_nw_info(context, instance) block_device_info = self._get_instance_block_device_info(context, instance) + accel_info = self._get_accel_info(context, instance) self.driver.power_on(context, instance, network_info, - block_device_info) + block_device_info, accel_info) def _delete_snapshot_of_shelved_instance(self, context, instance, snapshot_id): diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 0cc35f25f057..ae7fd058303d 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -7782,8 +7782,10 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase): self._setup_compute_nodes_and_device_rps() def _setup_compute_nodes_and_device_rps(self): + self.compute_services = [] for i in range(self.NUM_HOSTS): - self._start_compute(host='accel_host' + str(i)) + svc = self._start_compute(host='accel_host' + str(i)) + self.compute_services.append(svc) self.compute_rp_uuids = [ rp['uuid'] for rp in self._get_all_providers() if rp['uuid'] == rp['root_provider_uuid']] @@ -7947,6 +7949,30 @@ class AcceleratorServerTest(AcceleratorServerBase): # Verify that no allocations/usages remain after deletion self._check_no_allocs_usage(server_uuid) + def test_create_server_with_local_delete(self): + """Delete the server when compute service is down.""" + server = self._get_server() + server_uuid = server['id'] + + # Stop the server. + self.api.post_server_action(server_uuid, {'os-stop': {}}) + self._wait_for_state_change(server, 'SHUTOFF') + self._check_allocations_usage(server) + # Stop and force down the compute service. + compute_id = self.admin_api.get_services( + host='accel_host0', binary='nova-compute')[0]['id'] + self.compute_services[0].stop() + self.admin_api.put_service(compute_id, {'forced_down': 'true'}) + + # Delete the server with compute service down. + self.api.delete_server(server_uuid) + self.cyborg.mock_del_arqs.assert_called_once_with(server_uuid) + self._check_no_allocs_usage(server_uuid) + + # Restart the compute service to see if anything fails. + self.admin_api.put_service(compute_id, {'forced_down': 'false'}) + self.compute_services[0].start() + class AcceleratorServerReschedTest(AcceleratorServerBase): diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 308a5914bdfe..50490b682ae8 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -2493,7 +2493,7 @@ class ComputeTestCase(BaseTestCase, called = {'power_on': False} def fake_driver_power_on(self, context, instance, network_info, - block_device_info): + block_device_info, accel_device_info=None): called['power_on'] = True self.stub_out('nova.virt.fake.FakeDriver.power_on', @@ -2512,6 +2512,25 @@ class ComputeTestCase(BaseTestCase, self.assertTrue(called['power_on']) self.compute.terminate_instance(self.context, inst_obj, []) + @mock.patch.object(compute_manager.ComputeManager, + '_get_instance_block_device_info') + @mock.patch('nova.network.neutron.API.get_instance_nw_info') + @mock.patch.object(fake.FakeDriver, 'power_on') + @mock.patch('nova.accelerator.cyborg._CyborgClient.get_arqs_for_instance') + def test_power_on_with_accels(self, mock_get_arqs, + mock_power_on, mock_nw_info, mock_blockdev): + instance = self._create_fake_instance_obj() + instance.flavor.extra_specs = {'accel:device_profile': 'mydp'} + accel_info = [{'k1': 'v1', 'k2': 'v2'}] + mock_get_arqs.return_value = accel_info + mock_nw_info.return_value = 'nw_info' + mock_blockdev.return_value = 'blockdev_info' + + self.compute._power_on(self.context, instance) + mock_get_arqs.assert_called_once_with(instance['uuid']) + mock_power_on.assert_called_once_with(self.context, + instance, 'nw_info', 'blockdev_info', accel_info) + def test_power_off(self): # Ensure instance can be powered off. diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 61e86f768574..3d046a3db05a 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -889,10 +889,14 @@ class ComputeDriver(object): raise NotImplementedError() def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance. :param instance: nova.objects.instance.Instance + :param network_info: instance network information + :param block_device_info: instance volume block device info + :param accel_info: List of accelerator request dicts. The exact + data struct is doc'd in nova/virt/driver.py::spawn(). """ raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 51f057aeeaf0..b6a2c71f0293 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -276,7 +276,7 @@ class FakeDriver(driver.ComputeDriver): raise exception.InstanceNotFound(instance_id=instance.uuid) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): if instance.uuid in self.instances: self.instances[instance.uuid].state = power_state.RUNNING else: diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 3653ed1b6264..a399682d6128 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -224,7 +224,7 @@ class HyperVDriver(driver.ComputeDriver): self._vmops.power_off(instance, timeout, retry_interval) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): self._vmops.power_on(instance, block_device_info, network_info) def resume_state_on_host_boot(self, context, instance, network_info, diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 5cbbb6e0af1c..b9a4c013a934 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -1474,7 +1474,7 @@ class IronicDriver(virt_driver.ComputeDriver): node.uuid, instance=instance) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance. NOTE: Unlike the libvirt driver, this method does not delete @@ -1486,7 +1486,8 @@ class IronicDriver(virt_driver.ComputeDriver): this driver. :param block_device_info: Instance block device information. Ignored by this driver. - + :param accel_info: List of accelerator requests for this instance. + Ignored by this driver. """ LOG.debug('Power on called for instance', instance=instance) node = self._validate_instance_and_node(instance) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index bd7a15d8d554..f8ad2e7c2791 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -3297,12 +3297,13 @@ class LibvirtDriver(driver.ComputeDriver): self._destroy(instance) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance.""" # We use _hard_reboot here to ensure that all backing files, # network, and block device connections, etc. are established # and available before we attempt to start the instance. - self._hard_reboot(context, instance, network_info, block_device_info) + self._hard_reboot(context, instance, network_info, block_device_info, + accel_info) def trigger_crash_dump(self, instance): diff --git a/nova/virt/powervm/driver.py b/nova/virt/powervm/driver.py index 6c6f0d342537..8f39ec89a1b4 100644 --- a/nova/virt/powervm/driver.py +++ b/nova/virt/powervm/driver.py @@ -464,7 +464,7 @@ class PowerVMDriver(driver.ComputeDriver): timeout=timeout) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance. :param instance: nova.objects.instance.Instance diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index a7f5b163a1cb..214252652b65 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -658,7 +658,7 @@ class VMwareVCDriver(driver.ComputeDriver): self._vmops.power_off(instance, timeout, retry_interval) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance.""" self._vmops.power_on(instance) diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 64f1391c751a..72b9639f74ca 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -331,7 +331,7 @@ class XenAPIDriver(driver.ComputeDriver): self._vmops.power_off(instance) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): """Power on the specified instance.""" self._vmops.power_on(instance) diff --git a/nova/virt/zvm/driver.py b/nova/virt/zvm/driver.py index 781ae9f35a20..5d67a0b5dbf0 100644 --- a/nova/virt/zvm/driver.py +++ b/nova/virt/zvm/driver.py @@ -395,7 +395,7 @@ class ZVMDriver(driver.ComputeDriver): self._hypervisor.guest_softstop(instance.name) def power_on(self, context, instance, network_info, - block_device_info=None): + block_device_info=None, accel_info=None): self._hypervisor.guest_start(instance.name) def pause(self, instance):