Merge "pwr mgmt: make API into a per-driver object"

2024-03-13 23:48:29 +00:00 · 2024-03-13 23:48:29 +00:00 · 52a7d9cef9
parent db9351ab51 29dc044a7a
commit 52a7d9cef9
3 changed files with 121 additions and 97 deletions
--- a/nova/tests/unit/virt/libvirt/cpu/test_api.py
+++ b/nova/tests/unit/virt/libvirt/cpu/test_api.py
@ -24,6 +24,7 @@ class TestAPI(test.NoDBTestCase):
    def setUp(self):
        super(TestAPI, self).setUp()
        self.core_1 = api.Core(1)
+        self.api = api.API()

        # Create a fake instance with two pinned CPUs but only one is on the
        # dedicated set
@ -82,7 +83,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management=True, group='libvirt')
        self.flags(cpu_dedicated_set='1-2', group='compute')

-        api.power_up(self.fake_inst)
+        self.api.power_up(self.fake_inst)
        # only core #2 can be set as core #0 is not on the dedicated set
        # As a reminder, core(i).online calls set_online(i)
        mock_online.assert_called_once_with(2)
@ -93,7 +94,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management_strategy='governor', group='libvirt')
        self.flags(cpu_dedicated_set='1-2', group='compute')

-        api.power_up(self.fake_inst)
+        self.api.power_up(self.fake_inst)
        # only core #2 can be set as core #1 is not on the dedicated set
        # As a reminder, core(i).set_high_governor calls set_governor(i)
        mock_set_governor.assert_called_once_with(2, 'performance')
@ -101,13 +102,13 @@ class TestAPI(test.NoDBTestCase):
    @mock.patch.object(core, 'set_online')
    def test_power_up_skipped(self, mock_online):
        self.flags(cpu_power_management=False, group='libvirt')
-        api.power_up(self.fake_inst)
+        self.api.power_up(self.fake_inst)
        mock_online.assert_not_called()

    @mock.patch.object(core, 'set_online')
    def test_power_up_skipped_if_standard_instance(self, mock_online):
        self.flags(cpu_power_management=True, group='libvirt')
-        api.power_up(objects.Instance(numa_topology=None))
+        self.api.power_up(objects.Instance(numa_topology=None))
        mock_online.assert_not_called()

    @mock.patch.object(core, 'set_offline')
@ -115,7 +116,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management=True, group='libvirt')
        self.flags(cpu_dedicated_set='1-2', group='compute')

-        api.power_down(self.fake_inst)
+        self.api.power_down(self.fake_inst)
        # only core #2 can be set as core #1 is not on the dedicated set
        # As a reminder, core(i).online calls set_online(i)
        mock_offline.assert_called_once_with(2)
@ -126,7 +127,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management_strategy='governor', group='libvirt')
        self.flags(cpu_dedicated_set='0-1', group='compute')

-        api.power_down(self.fake_inst)
+        self.api.power_down(self.fake_inst)

        # Make sure that core #0 is ignored, since it is special and cannot
        # be powered down.
@ -138,7 +139,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management_strategy='governor', group='libvirt')
        self.flags(cpu_dedicated_set='1-2', group='compute')

-        api.power_down(self.fake_inst)
+        self.api.power_down(self.fake_inst)

        # only core #2 can be set as core #0 is not on the dedicated set
        # As a reminder, core(i).set_high_governor calls set_governor(i)
@ -147,13 +148,13 @@ class TestAPI(test.NoDBTestCase):
    @mock.patch.object(core, 'set_offline')
    def test_power_down_skipped(self, mock_offline):
        self.flags(cpu_power_management=False, group='libvirt')
-        api.power_down(self.fake_inst)
+        self.api.power_down(self.fake_inst)
        mock_offline.assert_not_called()

    @mock.patch.object(core, 'set_offline')
    def test_power_down_skipped_if_standard_instance(self, mock_offline):
        self.flags(cpu_power_management=True, group='libvirt')
-        api.power_down(objects.Instance(numa_topology=None))
+        self.api.power_down(objects.Instance(numa_topology=None))
        mock_offline.assert_not_called()

    @mock.patch.object(core, 'set_offline')
@ -161,7 +162,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management=True, group='libvirt')
        self.flags(cpu_dedicated_set='0-2', group='compute')

-        api.power_down_all_dedicated_cpus()
+        self.api.power_down_all_dedicated_cpus()
        # All dedicated CPUs are turned offline, except CPU0
        mock_offline.assert_has_calls([mock.call(1), mock.call(2)])

@ -171,7 +172,7 @@ class TestAPI(test.NoDBTestCase):
        self.flags(cpu_power_management_strategy='governor', group='libvirt')
        self.flags(cpu_dedicated_set='0-2', group='compute')

-        api.power_down_all_dedicated_cpus()
+        self.api.power_down_all_dedicated_cpus()
        # All dedicated CPUs are turned offline, except CPU0
        mock_set_governor.assert_has_calls([mock.call(1, 'powersave'),
                                            mock.call(2, 'powersave')])
@ -179,7 +180,7 @@ class TestAPI(test.NoDBTestCase):
    @mock.patch.object(core, 'set_offline')
    def test_power_down_all_dedicated_cpus_skipped(self, mock_offline):
        self.flags(cpu_power_management=False, group='libvirt')
-        api.power_down_all_dedicated_cpus()
+        self.api.power_down_all_dedicated_cpus()
        mock_offline.assert_not_called()

    @mock.patch.object(core, 'set_offline')
@ -188,7 +189,7 @@ class TestAPI(test.NoDBTestCase):
    ):
        self.flags(cpu_power_management=True, group='libvirt')
        self.flags(cpu_dedicated_set=None, group='compute')
-        api.power_down_all_dedicated_cpus()
+        self.api.power_down_all_dedicated_cpus()
        mock_offline.assert_not_called()

    @mock.patch.object(core, 'get_governor')
@ -201,7 +202,7 @@ class TestAPI(test.NoDBTestCase):
        mock_get_governor.return_value = 'performance'
        mock_get_online.side_effect = (True, False)
        self.assertRaises(exception.InvalidConfiguration,
-                          api.validate_all_dedicated_cpus)
+                          self.api.validate_all_dedicated_cpus)

    @mock.patch.object(core, 'get_governor')
    @mock.patch.object(core, 'get_online')
@ -213,7 +214,7 @@ class TestAPI(test.NoDBTestCase):
        mock_get_online.return_value = True
        mock_get_governor.side_effect = ('powersave', 'performance')
        self.assertRaises(exception.InvalidConfiguration,
-                          api.validate_all_dedicated_cpus)
+                          self.api.validate_all_dedicated_cpus)

    @mock.patch.object(core, 'get_governor')
    @mock.patch.object(core, 'get_online')
@ -227,7 +228,7 @@ class TestAPI(test.NoDBTestCase):
        mock_get_online.return_value = True
        mock_get_governor.return_value = 'performance'

-        api.validate_all_dedicated_cpus()
+        self.api.validate_all_dedicated_cpus()

        # Make sure we skipped CPU0
        mock_get_online.assert_has_calls([mock.call(1), mock.call(2)])
@ -240,6 +241,6 @@ class TestAPI(test.NoDBTestCase):
    def test_validate_all_dedicated_cpus_no_cpu(self):
        self.flags(cpu_power_management=True, group='libvirt')
        self.flags(cpu_dedicated_set=None, group='compute')
-        api.validate_all_dedicated_cpus()
+        self.api.validate_all_dedicated_cpus()
        # no assert we want to make sure the validation won't raise if
        # no dedicated cpus are configured
--- a/nova/virt/libvirt/cpu/api.py
+++ b/nova/virt/libvirt/cpu/api.py
@ -75,88 +75,102 @@ class Core:
        core.set_governor(self.ident, CONF.libvirt.cpu_power_governor_low)


-def power_up(instance: objects.Instance) -> None:
-    if not CONF.libvirt.cpu_power_management:
-        return
-    if instance.numa_topology is None:
-        return
+class API(object):

-    cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
-    pcpus = instance.numa_topology.cpu_pinning.union(
-        instance.numa_topology.cpuset_reserved)
-    powered_up = set()
-    for pcpu in pcpus:
-        if pcpu in cpu_dedicated_set:
-            pcpu = Core(pcpu)
-            if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
-                pcpu.online = True
-            else:
-                pcpu.set_high_governor()
-            powered_up.add(str(pcpu))
-    LOG.debug("Cores powered up : %s", powered_up)
+    def core(self, i):
+        """From a purely functional point of view, there is no need for this
+        method. However, we want to test power management in multinode
+        scenarios (ex: live migration) in our functional tests. If we
+        instantiated the Core class directly in the methods below, the
+        functional tests would not be able to distinguish between cores on the
+        source and destination hosts. In functional tests we can replace this
+        helper method by a stub that returns a fixture, allowing us to maintain
+        distinct core power state for each host.

+        See also nova.virt.libvirt.driver.LibvirtDriver.cpu_api.
+        """
+        return Core(i)

-def power_down(instance: objects.Instance) -> None:
-    if not CONF.libvirt.cpu_power_management:
-        return
-    if instance.numa_topology is None:
-        return
+    def power_up(self, instance: objects.Instance) -> None:
+        if not CONF.libvirt.cpu_power_management:
+            return
+        if instance.numa_topology is None:
+            return

-    cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
-    pcpus = instance.numa_topology.cpu_pinning.union(
-        instance.numa_topology.cpuset_reserved)
-    powered_down = set()
-    for pcpu in pcpus:
-        if pcpu in cpu_dedicated_set:
-            pcpu = Core(pcpu)
+        cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
+        pcpus = instance.numa_topology.cpu_pinning.union(
+            instance.numa_topology.cpuset_reserved)
+        powered_up = set()
+        for pcpu in pcpus:
+            if pcpu in cpu_dedicated_set:
+                pcpu = self.core(pcpu)
+                if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
+                    pcpu.online = True
+                else:
+                    pcpu.set_high_governor()
+                powered_up.add(str(pcpu))
+        LOG.debug("Cores powered up : %s", powered_up)
+
+    def power_down(self, instance: objects.Instance) -> None:
+        if not CONF.libvirt.cpu_power_management:
+            return
+        if instance.numa_topology is None:
+            return
+
+        cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
+        pcpus = instance.numa_topology.cpu_pinning.union(
+            instance.numa_topology.cpuset_reserved)
+        powered_down = set()
+        for pcpu in pcpus:
+            if pcpu in cpu_dedicated_set:
+                pcpu = self.core(pcpu)
+                if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
+                    pcpu.online = False
+                else:
+                    pcpu.set_low_governor()
+                powered_down.add(str(pcpu))
+        LOG.debug("Cores powered down : %s", powered_down)
+
+    def power_down_all_dedicated_cpus(self) -> None:
+        if not CONF.libvirt.cpu_power_management:
+            return
+
+        cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
+        for pcpu in cpu_dedicated_set:
+            pcpu = self.core(pcpu)
            if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
                pcpu.online = False
            else:
                pcpu.set_low_governor()
-            powered_down.add(str(pcpu))
-    LOG.debug("Cores powered down : %s", powered_down)
+        LOG.debug("Cores powered down : %s", cpu_dedicated_set)

-
-def power_down_all_dedicated_cpus() -> None:
-    if not CONF.libvirt.cpu_power_management:
-        return
-
-    cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
-    for pcpu in cpu_dedicated_set:
-        pcpu = Core(pcpu)
+    def validate_all_dedicated_cpus(self) -> None:
+        if not CONF.libvirt.cpu_power_management:
+            return
+        cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
+        governors = set()
+        cpu_states = set()
+        for pcpu in cpu_dedicated_set:
+            if (pcpu == 0 and
+                    CONF.libvirt.cpu_power_management_strategy == 'cpu_state'):
+                LOG.warning('CPU0 is in cpu_dedicated_set, '
+                            'but it is not eligible for state management '
+                            'and will be ignored')
+                continue
+            pcpu = self.core(pcpu)
+            # we need to collect the governors strategy and the CPU states
+            governors.add(pcpu.governor)
+            cpu_states.add(pcpu.online)
        if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
-            pcpu.online = False
-        else:
-            pcpu.set_low_governor()
-    LOG.debug("Cores powered down : %s", cpu_dedicated_set)
-
-
-def validate_all_dedicated_cpus() -> None:
-    if not CONF.libvirt.cpu_power_management:
-        return
-    cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
-    governors = set()
-    cpu_states = set()
-    for pcpu in cpu_dedicated_set:
-        if (pcpu == 0 and
-                CONF.libvirt.cpu_power_management_strategy == 'cpu_state'):
-            LOG.warning('CPU0 is in cpu_dedicated_set, but it is not eligible '
-                        'for state management and will be ignored')
-            continue
-        pcpu = Core(pcpu)
-        # we need to collect the governors strategy and the CPU states
-        governors.add(pcpu.governor)
-        cpu_states.add(pcpu.online)
-    if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
-        # all the cores need to have the same governor strategy
-        if len(governors) > 1:
-            msg = _("All the cores need to have the same governor strategy"
-                    "before modifying the CPU states. You can reboot the "
-                    "compute node if you prefer.")
-            raise exception.InvalidConfiguration(msg)
-    elif CONF.libvirt.cpu_power_management_strategy == 'governor':
-        # all the cores need to be online
-        if False in cpu_states:
-            msg = _("All the cores need to be online before modifying the "
-                    "governor strategy.")
-            raise exception.InvalidConfiguration(msg)
+            # all the cores need to have the same governor strategy
+            if len(governors) > 1:
+                msg = _("All the cores need to have the same governor strategy"
+                        "before modifying the CPU states. You can reboot the "
+                        "compute node if you prefer.")
+                raise exception.InvalidConfiguration(msg)
+        elif CONF.libvirt.cpu_power_management_strategy == 'governor':
+            # all the cores need to be online
+            if False in cpu_states:
+                msg = _("All the cores need to be online before modifying the "
+                        "governor strategy.")
+                raise exception.InvalidConfiguration(msg)
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@ -549,6 +549,15 @@ class LibvirtDriver(driver.ComputeDriver):
        # events about success or failure.
        self._device_event_handler = AsyncDeviceEventsHandler()

+        # NOTE(artom) From a pure functionality point of view, there's no need
+        # for this to be an attribute of self. However, we want to test power
+        # management in multinode scenarios (ex: live migration) in our
+        # functional tests. If the power management code was just a bunch of
+        # module level functions, the functional tests would not be able to
+        # distinguish between cores on the source and destination hosts.
+        # See also nova.virt.libvirt.cpu.api.API.core().
+        self.cpu_api = libvirt_cpu.API()
+
    def _discover_vpmems(self, vpmem_conf=None):
        """Discover vpmems on host and configuration.

@ -829,13 +838,13 @@ class LibvirtDriver(driver.ComputeDriver):
        # modified by Nova before. Note that it can provide an exception if
        # either the governor strategies are different between the cores or if
        # the cores are offline.
-        libvirt_cpu.validate_all_dedicated_cpus()
+        self.cpu_api.validate_all_dedicated_cpus()
        # NOTE(sbauza): We powerdown all dedicated CPUs but if some instances
        # exist that are pinned for some CPUs, then we'll later powerup those
        # CPUs when rebooting the instance in _init_instance()
        # Note that it can provide an exception if the config options are
        # wrongly modified.
-        libvirt_cpu.power_down_all_dedicated_cpus()
+        self.cpu_api.power_down_all_dedicated_cpus()

        # TODO(sbauza): Remove this code once mediated devices are persisted
        # across reboots.
@ -1559,7 +1568,7 @@ class LibvirtDriver(driver.ComputeDriver):
            if CONF.libvirt.virt_type == 'lxc':
                self._teardown_container(instance)
            # We're sure the instance is gone, we can shutdown the core if so
-            libvirt_cpu.power_down(instance)
+            self.cpu_api.power_down(instance)

    def destroy(self, context, instance, network_info, block_device_info=None,
                destroy_disks=True, destroy_secrets=True):
@ -3301,7 +3310,7 @@ class LibvirtDriver(driver.ComputeDriver):

        current_power_state = guest.get_power_state(self._host)

-        libvirt_cpu.power_up(instance)
+        self.cpu_api.power_up(instance)
        # TODO(stephenfin): Any reason we couldn't use 'self.resume' here?
        guest.launch(pause=current_power_state == power_state.PAUSED)

@ -7954,7 +7963,7 @@ class LibvirtDriver(driver.ComputeDriver):
                post_xml_callback()

            if power_on or pause:
-                libvirt_cpu.power_up(instance)
+                self.cpu_api.power_up(instance)
                guest.launch(pause=pause)

            return guest