Merge "pwr mgmt: make API into a per-driver object"

This commit is contained in:
Zuul 2024-03-13 23:48:29 +00:00 committed by Gerrit Code Review
commit 52a7d9cef9
3 changed files with 121 additions and 97 deletions

View File

@ -24,6 +24,7 @@ class TestAPI(test.NoDBTestCase):
def setUp(self):
super(TestAPI, self).setUp()
self.core_1 = api.Core(1)
self.api = api.API()
# Create a fake instance with two pinned CPUs but only one is on the
# dedicated set
@ -82,7 +83,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management=True, group='libvirt')
self.flags(cpu_dedicated_set='1-2', group='compute')
api.power_up(self.fake_inst)
self.api.power_up(self.fake_inst)
# only core #2 can be set as core #0 is not on the dedicated set
# As a reminder, core(i).online calls set_online(i)
mock_online.assert_called_once_with(2)
@ -93,7 +94,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management_strategy='governor', group='libvirt')
self.flags(cpu_dedicated_set='1-2', group='compute')
api.power_up(self.fake_inst)
self.api.power_up(self.fake_inst)
# only core #2 can be set as core #1 is not on the dedicated set
# As a reminder, core(i).set_high_governor calls set_governor(i)
mock_set_governor.assert_called_once_with(2, 'performance')
@ -101,13 +102,13 @@ class TestAPI(test.NoDBTestCase):
@mock.patch.object(core, 'set_online')
def test_power_up_skipped(self, mock_online):
self.flags(cpu_power_management=False, group='libvirt')
api.power_up(self.fake_inst)
self.api.power_up(self.fake_inst)
mock_online.assert_not_called()
@mock.patch.object(core, 'set_online')
def test_power_up_skipped_if_standard_instance(self, mock_online):
self.flags(cpu_power_management=True, group='libvirt')
api.power_up(objects.Instance(numa_topology=None))
self.api.power_up(objects.Instance(numa_topology=None))
mock_online.assert_not_called()
@mock.patch.object(core, 'set_offline')
@ -115,7 +116,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management=True, group='libvirt')
self.flags(cpu_dedicated_set='1-2', group='compute')
api.power_down(self.fake_inst)
self.api.power_down(self.fake_inst)
# only core #2 can be set as core #1 is not on the dedicated set
# As a reminder, core(i).online calls set_online(i)
mock_offline.assert_called_once_with(2)
@ -126,7 +127,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management_strategy='governor', group='libvirt')
self.flags(cpu_dedicated_set='0-1', group='compute')
api.power_down(self.fake_inst)
self.api.power_down(self.fake_inst)
# Make sure that core #0 is ignored, since it is special and cannot
# be powered down.
@ -138,7 +139,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management_strategy='governor', group='libvirt')
self.flags(cpu_dedicated_set='1-2', group='compute')
api.power_down(self.fake_inst)
self.api.power_down(self.fake_inst)
# only core #2 can be set as core #0 is not on the dedicated set
# As a reminder, core(i).set_high_governor calls set_governor(i)
@ -147,13 +148,13 @@ class TestAPI(test.NoDBTestCase):
@mock.patch.object(core, 'set_offline')
def test_power_down_skipped(self, mock_offline):
self.flags(cpu_power_management=False, group='libvirt')
api.power_down(self.fake_inst)
self.api.power_down(self.fake_inst)
mock_offline.assert_not_called()
@mock.patch.object(core, 'set_offline')
def test_power_down_skipped_if_standard_instance(self, mock_offline):
self.flags(cpu_power_management=True, group='libvirt')
api.power_down(objects.Instance(numa_topology=None))
self.api.power_down(objects.Instance(numa_topology=None))
mock_offline.assert_not_called()
@mock.patch.object(core, 'set_offline')
@ -161,7 +162,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management=True, group='libvirt')
self.flags(cpu_dedicated_set='0-2', group='compute')
api.power_down_all_dedicated_cpus()
self.api.power_down_all_dedicated_cpus()
# All dedicated CPUs are turned offline, except CPU0
mock_offline.assert_has_calls([mock.call(1), mock.call(2)])
@ -171,7 +172,7 @@ class TestAPI(test.NoDBTestCase):
self.flags(cpu_power_management_strategy='governor', group='libvirt')
self.flags(cpu_dedicated_set='0-2', group='compute')
api.power_down_all_dedicated_cpus()
self.api.power_down_all_dedicated_cpus()
# All dedicated CPUs are turned offline, except CPU0
mock_set_governor.assert_has_calls([mock.call(1, 'powersave'),
mock.call(2, 'powersave')])
@ -179,7 +180,7 @@ class TestAPI(test.NoDBTestCase):
@mock.patch.object(core, 'set_offline')
def test_power_down_all_dedicated_cpus_skipped(self, mock_offline):
self.flags(cpu_power_management=False, group='libvirt')
api.power_down_all_dedicated_cpus()
self.api.power_down_all_dedicated_cpus()
mock_offline.assert_not_called()
@mock.patch.object(core, 'set_offline')
@ -188,7 +189,7 @@ class TestAPI(test.NoDBTestCase):
):
self.flags(cpu_power_management=True, group='libvirt')
self.flags(cpu_dedicated_set=None, group='compute')
api.power_down_all_dedicated_cpus()
self.api.power_down_all_dedicated_cpus()
mock_offline.assert_not_called()
@mock.patch.object(core, 'get_governor')
@ -201,7 +202,7 @@ class TestAPI(test.NoDBTestCase):
mock_get_governor.return_value = 'performance'
mock_get_online.side_effect = (True, False)
self.assertRaises(exception.InvalidConfiguration,
api.validate_all_dedicated_cpus)
self.api.validate_all_dedicated_cpus)
@mock.patch.object(core, 'get_governor')
@mock.patch.object(core, 'get_online')
@ -213,7 +214,7 @@ class TestAPI(test.NoDBTestCase):
mock_get_online.return_value = True
mock_get_governor.side_effect = ('powersave', 'performance')
self.assertRaises(exception.InvalidConfiguration,
api.validate_all_dedicated_cpus)
self.api.validate_all_dedicated_cpus)
@mock.patch.object(core, 'get_governor')
@mock.patch.object(core, 'get_online')
@ -227,7 +228,7 @@ class TestAPI(test.NoDBTestCase):
mock_get_online.return_value = True
mock_get_governor.return_value = 'performance'
api.validate_all_dedicated_cpus()
self.api.validate_all_dedicated_cpus()
# Make sure we skipped CPU0
mock_get_online.assert_has_calls([mock.call(1), mock.call(2)])
@ -240,6 +241,6 @@ class TestAPI(test.NoDBTestCase):
def test_validate_all_dedicated_cpus_no_cpu(self):
self.flags(cpu_power_management=True, group='libvirt')
self.flags(cpu_dedicated_set=None, group='compute')
api.validate_all_dedicated_cpus()
self.api.validate_all_dedicated_cpus()
# no assert we want to make sure the validation won't raise if
# no dedicated cpus are configured

View File

@ -75,88 +75,102 @@ class Core:
core.set_governor(self.ident, CONF.libvirt.cpu_power_governor_low)
def power_up(instance: objects.Instance) -> None:
if not CONF.libvirt.cpu_power_management:
return
if instance.numa_topology is None:
return
class API(object):
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
pcpus = instance.numa_topology.cpu_pinning.union(
instance.numa_topology.cpuset_reserved)
powered_up = set()
for pcpu in pcpus:
if pcpu in cpu_dedicated_set:
pcpu = Core(pcpu)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
pcpu.online = True
else:
pcpu.set_high_governor()
powered_up.add(str(pcpu))
LOG.debug("Cores powered up : %s", powered_up)
def core(self, i):
"""From a purely functional point of view, there is no need for this
method. However, we want to test power management in multinode
scenarios (ex: live migration) in our functional tests. If we
instantiated the Core class directly in the methods below, the
functional tests would not be able to distinguish between cores on the
source and destination hosts. In functional tests we can replace this
helper method by a stub that returns a fixture, allowing us to maintain
distinct core power state for each host.
See also nova.virt.libvirt.driver.LibvirtDriver.cpu_api.
"""
return Core(i)
def power_down(instance: objects.Instance) -> None:
if not CONF.libvirt.cpu_power_management:
return
if instance.numa_topology is None:
return
def power_up(self, instance: objects.Instance) -> None:
if not CONF.libvirt.cpu_power_management:
return
if instance.numa_topology is None:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
pcpus = instance.numa_topology.cpu_pinning.union(
instance.numa_topology.cpuset_reserved)
powered_down = set()
for pcpu in pcpus:
if pcpu in cpu_dedicated_set:
pcpu = Core(pcpu)
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
pcpus = instance.numa_topology.cpu_pinning.union(
instance.numa_topology.cpuset_reserved)
powered_up = set()
for pcpu in pcpus:
if pcpu in cpu_dedicated_set:
pcpu = self.core(pcpu)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
pcpu.online = True
else:
pcpu.set_high_governor()
powered_up.add(str(pcpu))
LOG.debug("Cores powered up : %s", powered_up)
def power_down(self, instance: objects.Instance) -> None:
if not CONF.libvirt.cpu_power_management:
return
if instance.numa_topology is None:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
pcpus = instance.numa_topology.cpu_pinning.union(
instance.numa_topology.cpuset_reserved)
powered_down = set()
for pcpu in pcpus:
if pcpu in cpu_dedicated_set:
pcpu = self.core(pcpu)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
pcpu.online = False
else:
pcpu.set_low_governor()
powered_down.add(str(pcpu))
LOG.debug("Cores powered down : %s", powered_down)
def power_down_all_dedicated_cpus(self) -> None:
if not CONF.libvirt.cpu_power_management:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
for pcpu in cpu_dedicated_set:
pcpu = self.core(pcpu)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
pcpu.online = False
else:
pcpu.set_low_governor()
powered_down.add(str(pcpu))
LOG.debug("Cores powered down : %s", powered_down)
LOG.debug("Cores powered down : %s", cpu_dedicated_set)
def power_down_all_dedicated_cpus() -> None:
if not CONF.libvirt.cpu_power_management:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set_nozero() or set()
for pcpu in cpu_dedicated_set:
pcpu = Core(pcpu)
def validate_all_dedicated_cpus(self) -> None:
if not CONF.libvirt.cpu_power_management:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
governors = set()
cpu_states = set()
for pcpu in cpu_dedicated_set:
if (pcpu == 0 and
CONF.libvirt.cpu_power_management_strategy == 'cpu_state'):
LOG.warning('CPU0 is in cpu_dedicated_set, '
'but it is not eligible for state management '
'and will be ignored')
continue
pcpu = self.core(pcpu)
# we need to collect the governors strategy and the CPU states
governors.add(pcpu.governor)
cpu_states.add(pcpu.online)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
pcpu.online = False
else:
pcpu.set_low_governor()
LOG.debug("Cores powered down : %s", cpu_dedicated_set)
def validate_all_dedicated_cpus() -> None:
if not CONF.libvirt.cpu_power_management:
return
cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set()
governors = set()
cpu_states = set()
for pcpu in cpu_dedicated_set:
if (pcpu == 0 and
CONF.libvirt.cpu_power_management_strategy == 'cpu_state'):
LOG.warning('CPU0 is in cpu_dedicated_set, but it is not eligible '
'for state management and will be ignored')
continue
pcpu = Core(pcpu)
# we need to collect the governors strategy and the CPU states
governors.add(pcpu.governor)
cpu_states.add(pcpu.online)
if CONF.libvirt.cpu_power_management_strategy == 'cpu_state':
# all the cores need to have the same governor strategy
if len(governors) > 1:
msg = _("All the cores need to have the same governor strategy"
"before modifying the CPU states. You can reboot the "
"compute node if you prefer.")
raise exception.InvalidConfiguration(msg)
elif CONF.libvirt.cpu_power_management_strategy == 'governor':
# all the cores need to be online
if False in cpu_states:
msg = _("All the cores need to be online before modifying the "
"governor strategy.")
raise exception.InvalidConfiguration(msg)
# all the cores need to have the same governor strategy
if len(governors) > 1:
msg = _("All the cores need to have the same governor strategy"
"before modifying the CPU states. You can reboot the "
"compute node if you prefer.")
raise exception.InvalidConfiguration(msg)
elif CONF.libvirt.cpu_power_management_strategy == 'governor':
# all the cores need to be online
if False in cpu_states:
msg = _("All the cores need to be online before modifying the "
"governor strategy.")
raise exception.InvalidConfiguration(msg)

View File

@ -549,6 +549,15 @@ class LibvirtDriver(driver.ComputeDriver):
# events about success or failure.
self._device_event_handler = AsyncDeviceEventsHandler()
# NOTE(artom) From a pure functionality point of view, there's no need
# for this to be an attribute of self. However, we want to test power
# management in multinode scenarios (ex: live migration) in our
# functional tests. If the power management code was just a bunch of
# module level functions, the functional tests would not be able to
# distinguish between cores on the source and destination hosts.
# See also nova.virt.libvirt.cpu.api.API.core().
self.cpu_api = libvirt_cpu.API()
def _discover_vpmems(self, vpmem_conf=None):
"""Discover vpmems on host and configuration.
@ -829,13 +838,13 @@ class LibvirtDriver(driver.ComputeDriver):
# modified by Nova before. Note that it can provide an exception if
# either the governor strategies are different between the cores or if
# the cores are offline.
libvirt_cpu.validate_all_dedicated_cpus()
self.cpu_api.validate_all_dedicated_cpus()
# NOTE(sbauza): We powerdown all dedicated CPUs but if some instances
# exist that are pinned for some CPUs, then we'll later powerup those
# CPUs when rebooting the instance in _init_instance()
# Note that it can provide an exception if the config options are
# wrongly modified.
libvirt_cpu.power_down_all_dedicated_cpus()
self.cpu_api.power_down_all_dedicated_cpus()
# TODO(sbauza): Remove this code once mediated devices are persisted
# across reboots.
@ -1559,7 +1568,7 @@ class LibvirtDriver(driver.ComputeDriver):
if CONF.libvirt.virt_type == 'lxc':
self._teardown_container(instance)
# We're sure the instance is gone, we can shutdown the core if so
libvirt_cpu.power_down(instance)
self.cpu_api.power_down(instance)
def destroy(self, context, instance, network_info, block_device_info=None,
destroy_disks=True, destroy_secrets=True):
@ -3301,7 +3310,7 @@ class LibvirtDriver(driver.ComputeDriver):
current_power_state = guest.get_power_state(self._host)
libvirt_cpu.power_up(instance)
self.cpu_api.power_up(instance)
# TODO(stephenfin): Any reason we couldn't use 'self.resume' here?
guest.launch(pause=current_power_state == power_state.PAUSED)
@ -7954,7 +7963,7 @@ class LibvirtDriver(driver.ComputeDriver):
post_xml_callback()
if power_on or pause:
libvirt_cpu.power_up(instance)
self.cpu_api.power_up(instance)
guest.launch(pause=pause)
return guest