From c5a73e6c7227f199bfa0b66c6ef0c2730d70c3b2 Mon Sep 17 00:00:00 2001 From: Artom Lifshitz Date: Wed, 21 Feb 2024 19:58:32 -0500 Subject: [PATCH] pwr mgmt: handle live migrations correctly Previously, live migrations completely ignored CPU power management. This patch makes sure that we correctly: * Power up the cores on the destination during pre_live_migration, as we need them powered up before the instance starts on the destination. * If the live migration is successful, power down the vacated cores on the source. * In case of a rollback, power down the cores previously powered up on pre_live_migration. NOTE(artom) Conflicts in nova/compute/manager.py around the do_cleanup determination because mdev live migration is not in Bobcat. Closes-bug: 2056613 Change-Id: I787bd7807950370cd865f29b95989d489d4826d0 (cherry picked from commit c1ccc1a3165ec1556c605b3b036274e992b0a09d) --- nova/compute/manager.py | 6 +- .../functional/libvirt/test_power_manage.py | 71 ++++++++++++++++--- nova/tests/unit/virt/libvirt/cpu/test_api.py | 18 ++--- nova/virt/libvirt/cpu/api.py | 58 ++++++++++----- nova/virt/libvirt/driver.py | 23 +++++- 5 files changed, 137 insertions(+), 39 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9e39511dddf6..8ca0e1498ac4 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -9188,12 +9188,16 @@ class ComputeManager(manager.Manager): objects.LibvirtVPMEMDevice)): has_vpmem = True break + power_management_possible = ( + 'dst_numa_info' in migrate_data and + migrate_data.dst_numa_info is not None) # No instance booting at source host, but instance dir # must be deleted for preparing next block migration # must be deleted for preparing next live migration w/o shared # storage # vpmem must be cleaned - do_cleanup = not migrate_data.is_shared_instance_path or has_vpmem + do_cleanup = (not migrate_data.is_shared_instance_path or + has_vpmem or power_management_possible) destroy_disks = not migrate_data.is_shared_block_storage elif isinstance(migrate_data, migrate_data_obj.HyperVLiveMigrateData): # NOTE(claudiub): We need to cleanup any zombie Planned VM. diff --git a/nova/tests/functional/libvirt/test_power_manage.py b/nova/tests/functional/libvirt/test_power_manage.py index ff8620a625a7..f6e4b1ba5b42 100644 --- a/nova/tests/functional/libvirt/test_power_manage.py +++ b/nova/tests/functional/libvirt/test_power_manage.py @@ -59,12 +59,15 @@ class PowerManagementTestsBase(base.ServersTestBase): 'hw:cpu_policy': 'dedicated', 'hw:cpu_thread_policy': 'prefer', } + self.isolate_extra_spec = { + 'hw:cpu_policy': 'dedicated', + 'hw:cpu_thread_policy': 'prefer', + 'hw:emulator_threads_policy': 'isolate', + } self.pcpu_flavor_id = self._create_flavor( vcpu=4, extra_spec=self.extra_spec) self.isolate_flavor_id = self._create_flavor( - vcpu=4, extra_spec={'hw:cpu_policy': 'dedicated', - 'hw:cpu_thread_policy': 'prefer', - 'hw:emulator_threads_policy': 'isolate'}) + vcpu=4, extra_spec=self.isolate_extra_spec) def _assert_server_cpus_state(self, server, expected='online'): inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) @@ -117,8 +120,8 @@ class CoresStub(object): return self.cores[i] -class PowerManagementLiveMigrationTests(base.LibvirtMigrationMixin, - PowerManagementTestsBase): +class PowerManagementLiveMigrationTestsBase(base.LibvirtMigrationMixin, + PowerManagementTestsBase): def setUp(self): super().setUp() @@ -129,10 +132,13 @@ class PowerManagementLiveMigrationTests(base.LibvirtMigrationMixin, self.flags(vcpu_pin_set=None) self.flags(cpu_power_management=True, group='libvirt') - # NOTE(artom) Fill up all dedicated CPUs. This makes the assertions - # further down easier. + # NOTE(artom) Fill up all dedicated CPUs (either with only the + # instance's CPUs, or instance CPUs + 1 emulator thread). This makes + # the assertions further down easier. self.pcpu_flavor_id = self._create_flavor( vcpu=9, extra_spec=self.extra_spec) + self.isolate_flavor_id = self._create_flavor( + vcpu=8, extra_spec=self.isolate_extra_spec) self.start_compute( host_info=fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, @@ -156,14 +162,61 @@ class PowerManagementLiveMigrationTests(base.LibvirtMigrationMixin, for i in cores: self.assertEqual(online, host.driver.cpu_api.core(i).online) + +class PowerManagementLiveMigrationTests(PowerManagementLiveMigrationTestsBase): + def test_live_migrate_server(self): self.server = self._create_server( flavor_id=self.pcpu_flavor_id, expected_state='ACTIVE', host='src') server = self._live_migrate(self.server) self.assertEqual('dest', server['OS-EXT-SRV-ATTR:host']) - # FIXME(artom) We've not powered up the dest cores, and left the src - # cores powered on. + # We've powered down the source cores, and powered up the destination + # ones. + self.assert_cores(self.src, range(1, 10), online=False) + self.assert_cores(self.dest, range(1, 10), online=True) + + def test_live_migrate_server_with_emulator_threads_isolate(self): + self.server = self._create_server( + flavor_id=self.isolate_flavor_id, + expected_state='ACTIVE', host='src') + server = self._live_migrate(self.server) + self.assertEqual('dest', server['OS-EXT-SRV-ATTR:host']) + # We're using a flavor with 8 CPUs, but with the extra dedicated CPU + # for the emulator threads, we expect all 9 cores to be powered up on + # the dest, and down on the source. + self.assert_cores(self.src, range(1, 10), online=False) + self.assert_cores(self.dest, range(1, 10), online=True) + + +class PowerManagementLiveMigrationRollbackTests( + PowerManagementLiveMigrationTestsBase): + + def _migrate_stub(self, domain, destination, params, flags): + conn = self.src.driver._host.get_connection() + dom = conn.lookupByUUIDString(self.server['id']) + dom.fail_job() + + def test_live_migrate_server_rollback(self): + self.server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE', host='src') + server = self._live_migrate(self.server, + migration_expected_state='failed') + self.assertEqual('src', server['OS-EXT-SRV-ATTR:host']) + self.assert_cores(self.src, range(1, 10), online=True) + self.assert_cores(self.dest, range(1, 10), online=False) + + def test_live_migrate_server_with_emulator_threads_isolate_rollback(self): + self.server = self._create_server( + flavor_id=self.isolate_flavor_id, + expected_state='ACTIVE', host='src') + server = self._live_migrate(self.server, + migration_expected_state='failed') + self.assertEqual('src', server['OS-EXT-SRV-ATTR:host']) + # We're using a flavor with 8 CPUs, but with the extra dedicated CPU + # for the emulator threads, we expect all 9 cores to be powered back + # down on the dest, and up on the source. self.assert_cores(self.src, range(1, 10), online=True) self.assert_cores(self.dest, range(1, 10), online=False) diff --git a/nova/tests/unit/virt/libvirt/cpu/test_api.py b/nova/tests/unit/virt/libvirt/cpu/test_api.py index e7aa67570ffd..408496dbd346 100644 --- a/nova/tests/unit/virt/libvirt/cpu/test_api.py +++ b/nova/tests/unit/virt/libvirt/cpu/test_api.py @@ -83,7 +83,7 @@ class TestAPI(test.NoDBTestCase): self.flags(cpu_power_management=True, group='libvirt') self.flags(cpu_dedicated_set='1-2', group='compute') - self.api.power_up(self.fake_inst) + self.api.power_up_for_instance(self.fake_inst) # only core #2 can be set as core #0 is not on the dedicated set # As a reminder, core(i).online calls set_online(i) mock_online.assert_called_once_with(2) @@ -94,7 +94,7 @@ class TestAPI(test.NoDBTestCase): self.flags(cpu_power_management_strategy='governor', group='libvirt') self.flags(cpu_dedicated_set='1-2', group='compute') - self.api.power_up(self.fake_inst) + self.api.power_up_for_instance(self.fake_inst) # only core #2 can be set as core #1 is not on the dedicated set # As a reminder, core(i).set_high_governor calls set_governor(i) mock_set_governor.assert_called_once_with(2, 'performance') @@ -102,13 +102,13 @@ class TestAPI(test.NoDBTestCase): @mock.patch.object(core, 'set_online') def test_power_up_skipped(self, mock_online): self.flags(cpu_power_management=False, group='libvirt') - self.api.power_up(self.fake_inst) + self.api.power_up_for_instance(self.fake_inst) mock_online.assert_not_called() @mock.patch.object(core, 'set_online') def test_power_up_skipped_if_standard_instance(self, mock_online): self.flags(cpu_power_management=True, group='libvirt') - self.api.power_up(objects.Instance(numa_topology=None)) + self.api.power_up_for_instance(objects.Instance(numa_topology=None)) mock_online.assert_not_called() @mock.patch.object(core, 'set_offline') @@ -116,7 +116,7 @@ class TestAPI(test.NoDBTestCase): self.flags(cpu_power_management=True, group='libvirt') self.flags(cpu_dedicated_set='1-2', group='compute') - self.api.power_down(self.fake_inst) + self.api.power_down_for_instance(self.fake_inst) # only core #2 can be set as core #1 is not on the dedicated set # As a reminder, core(i).online calls set_online(i) mock_offline.assert_called_once_with(2) @@ -127,7 +127,7 @@ class TestAPI(test.NoDBTestCase): self.flags(cpu_power_management_strategy='governor', group='libvirt') self.flags(cpu_dedicated_set='0-1', group='compute') - self.api.power_down(self.fake_inst) + self.api.power_down_for_instance(self.fake_inst) # Make sure that core #0 is ignored, since it is special and cannot # be powered down. @@ -139,7 +139,7 @@ class TestAPI(test.NoDBTestCase): self.flags(cpu_power_management_strategy='governor', group='libvirt') self.flags(cpu_dedicated_set='1-2', group='compute') - self.api.power_down(self.fake_inst) + self.api.power_down_for_instance(self.fake_inst) # only core #2 can be set as core #0 is not on the dedicated set # As a reminder, core(i).set_high_governor calls set_governor(i) @@ -148,13 +148,13 @@ class TestAPI(test.NoDBTestCase): @mock.patch.object(core, 'set_offline') def test_power_down_skipped(self, mock_offline): self.flags(cpu_power_management=False, group='libvirt') - self.api.power_down(self.fake_inst) + self.api.power_down_for_instance(self.fake_inst) mock_offline.assert_not_called() @mock.patch.object(core, 'set_offline') def test_power_down_skipped_if_standard_instance(self, mock_offline): self.flags(cpu_power_management=True, group='libvirt') - self.api.power_down(objects.Instance(numa_topology=None)) + self.api.power_down_for_instance(objects.Instance(numa_topology=None)) mock_offline.assert_not_called() @mock.patch.object(core, 'set_offline') diff --git a/nova/virt/libvirt/cpu/api.py b/nova/virt/libvirt/cpu/api.py index 02e6cfa8b978..1c4bd19bee28 100644 --- a/nova/virt/libvirt/cpu/api.py +++ b/nova/virt/libvirt/cpu/api.py @@ -91,19 +91,14 @@ class API(object): """ return Core(i) - def power_up(self, instance: objects.Instance) -> None: + def _power_up(self, cpus: ty.Set[int]) -> None: if not CONF.libvirt.cpu_power_management: return - if instance.numa_topology is None: - return - cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set() - pcpus = instance.numa_topology.cpu_pinning.union( - instance.numa_topology.cpuset_reserved) powered_up = set() - for pcpu in pcpus: - if pcpu in cpu_dedicated_set: - pcpu = self.core(pcpu) + for cpu in cpus: + if cpu in cpu_dedicated_set: + pcpu = self.core(cpu) if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': pcpu.online = True else: @@ -111,19 +106,31 @@ class API(object): powered_up.add(str(pcpu)) LOG.debug("Cores powered up : %s", powered_up) - def power_down(self, instance: objects.Instance) -> None: - if not CONF.libvirt.cpu_power_management: - return + def power_up_for_instance(self, instance: objects.Instance) -> None: if instance.numa_topology is None: return - - cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set() pcpus = instance.numa_topology.cpu_pinning.union( instance.numa_topology.cpuset_reserved) + self._power_up(pcpus) + + def power_up_for_migration( + self, dst_numa_info: objects.LibvirtLiveMigrateNUMAInfo + ) -> None: + pcpus = set() + if 'emulator_pins' in dst_numa_info and dst_numa_info.emulator_pins: + pcpus = dst_numa_info.emulator_pins + for pins in dst_numa_info.cpu_pins.values(): + pcpus = pcpus.union(pins) + self._power_up(pcpus) + + def _power_down(self, cpus: ty.Set[int]) -> None: + if not CONF.libvirt.cpu_power_management: + return + cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set() powered_down = set() - for pcpu in pcpus: - if pcpu in cpu_dedicated_set: - pcpu = self.core(pcpu) + for cpu in cpus: + if cpu in cpu_dedicated_set: + pcpu = self.core(cpu) if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': pcpu.online = False else: @@ -131,6 +138,23 @@ class API(object): powered_down.add(str(pcpu)) LOG.debug("Cores powered down : %s", powered_down) + def power_down_for_migration( + self, dst_numa_info: objects.LibvirtLiveMigrateNUMAInfo + ) -> None: + pcpus = set() + if 'emulator_pins' in dst_numa_info and dst_numa_info.emulator_pins: + pcpus = dst_numa_info.emulator_pins + for pins in dst_numa_info.cpu_pins.values(): + pcpus = pcpus.union(pins) + self._power_down(pcpus) + + def power_down_for_instance(self, instance: objects.Instance) -> None: + if instance.numa_topology is None: + return + pcpus = instance.numa_topology.cpu_pinning.union( + instance.numa_topology.cpuset_reserved) + self._power_down(pcpus) + def power_down_all_dedicated_cpus(self) -> None: if not CONF.libvirt.cpu_power_management: return diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index efec7523ec22..994d88f16c2e 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1563,7 +1563,7 @@ class LibvirtDriver(driver.ComputeDriver): if CONF.libvirt.virt_type == 'lxc': self._teardown_container(instance) # We're sure the instance is gone, we can shutdown the core if so - self.cpu_api.power_down(instance) + self.cpu_api.power_down_for_instance(instance) def destroy(self, context, instance, network_info, block_device_info=None, destroy_disks=True, destroy_secrets=True): @@ -3217,7 +3217,7 @@ class LibvirtDriver(driver.ComputeDriver): current_power_state = guest.get_power_state(self._host) - self.cpu_api.power_up(instance) + self.cpu_api.power_up_for_instance(instance) # TODO(stephenfin): Any reason we couldn't use 'self.resume' here? guest.launch(pause=current_power_state == power_state.PAUSED) @@ -7705,7 +7705,7 @@ class LibvirtDriver(driver.ComputeDriver): post_xml_callback() if power_on or pause: - self.cpu_api.power_up(instance) + self.cpu_api.power_up_for_instance(instance) guest.launch(pause=pause) return guest @@ -10783,6 +10783,16 @@ class LibvirtDriver(driver.ComputeDriver): serial_console.release_port( host=migrate_data.serial_listen_addr, port=port) + if ( + 'dst_numa_info' in migrate_data and + migrate_data.dst_numa_info + ): + self.cpu_api.power_down_for_migration( + migrate_data.dst_numa_info) + else: + LOG.debug('No dst_numa_info in migrate_data, ' + 'no cores to power down in rollback.') + if not is_shared_instance_path: instance_dir = libvirt_utils.get_instance_path_at_destination( instance, migrate_data) @@ -10949,6 +10959,12 @@ class LibvirtDriver(driver.ComputeDriver): migrate_data.bdms.append(bdmi) + if 'dst_numa_info' in migrate_data and migrate_data.dst_numa_info: + self.cpu_api.power_up_for_migration(migrate_data.dst_numa_info) + else: + LOG.debug('No dst_numa_info in migrate_data, ' + 'no cores to power up in pre_live_migration.') + return migrate_data def _try_fetch_image_cache(self, image, fetch_func, context, filename, @@ -11112,6 +11128,7 @@ class LibvirtDriver(driver.ComputeDriver): :param network_info: instance network information """ self.unplug_vifs(instance, network_info) + self.cpu_api.power_down_for_instance(instance) def _qemu_monitor_announce_self(self, instance): """Send announce_self command to QEMU monitor.