Merge "Validate CPU config options against running instances"

This commit is contained in:
Zuul 2019-09-19 20:15:15 +00:00 committed by Gerrit Code Review
commit c0f5387621
11 changed files with 183 additions and 3 deletions

View File

@ -99,6 +99,7 @@ from nova.virt import block_device as driver_block_device
from nova.virt import configdrive
from nova.virt import driver
from nova.virt import event as virtevent
from nova.virt import hardware
from nova.virt import storage_users
from nova.virt import virtapi
from nova.volume import cinder
@ -816,6 +817,63 @@ class ComputeManager(manager.Manager):
self._clean_instance_console_tokens(context, instance)
self._delete_scheduler_instance_info(context, instance.uuid)
def _validate_pinning_configuration(self, instances):
if not self.driver.capabilities.get('supports_pcpus', False):
return
for instance in instances:
# ignore deleted instances
if instance.deleted:
continue
# if this is an unpinned instance and the host only has
# 'cpu_dedicated_set' configured, we need to tell the operator to
# correct their configuration
if not (instance.numa_topology and
instance.numa_topology.cpu_pinning_requested):
# we don't need to check 'vcpu_pin_set' since it can't coexist
# alongside 'cpu_dedicated_set'
if (CONF.compute.cpu_dedicated_set and
not CONF.compute.cpu_shared_set):
msg = _("This host has unpinned instances but has no CPUs "
"set aside for this purpose; configure '[compute] "
"cpu_shared_set' instead of, or in addition to, "
"'[compute] cpu_dedicated_set'")
raise exception.InvalidConfiguration(msg)
continue
# ditto for pinned instances if only 'cpu_shared_set' is configured
if (CONF.compute.cpu_shared_set and
not CONF.compute.cpu_dedicated_set and
not CONF.vcpu_pin_set):
msg = _("This host has pinned instances but has no CPUs "
"set aside for this purpose; configure '[compute] "
"cpu_dedicated_set' instead of, or in addition to, "
"'[compute] cpu_shared_set'")
raise exception.InvalidConfiguration(msg)
# also check to make sure the operator hasn't accidentally
# dropped some cores that instances are currently using
available_dedicated_cpus = (hardware.get_vcpu_pin_set() or
hardware.get_cpu_dedicated_set())
pinned_cpus = instance.numa_topology.cpu_pinning
if available_dedicated_cpus and (
pinned_cpus - available_dedicated_cpus):
# we can't raise an exception because of bug #1289064,
# which meant we didn't recalculate CPU pinning information
# when we live migrated a pinned instance
LOG.warning(
"Instance is pinned to host CPUs %(cpus)s "
"but one or more of these CPUs are not included in "
"either '[compute] cpu_dedicated_set' or "
"'vcpu_pin_set'; you should update these "
"configuration options to include the missing CPUs "
"or rebuild or cold migrate this instance.",
{'cpus': list(pinned_cpus)},
instance=instance)
def _init_instance(self, context, instance):
"""Initialize this instance during service init."""
@ -1255,13 +1313,16 @@ class ComputeManager(manager.Manager):
self.driver.init_host(host=self.host)
context = nova.context.get_admin_context()
instances = objects.InstanceList.get_by_host(
context, self.host, expected_attrs=['info_cache', 'metadata'])
context, self.host,
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
if CONF.defer_iptables_apply:
self.driver.filter_defer_apply_on()
self.init_virt_events()
self._validate_pinning_configuration(instances)
try:
# checking that instance was not already evacuated to other host
evacuated_instances = self._destroy_evacuated_instances(context)

View File

@ -70,6 +70,7 @@ from nova.tests.unit import fake_network_cache_model
from nova.tests.unit import fake_notifier
from nova.tests.unit.objects import test_instance_fault
from nova.tests.unit.objects import test_instance_info_cache
from nova.tests.unit.objects import test_instance_numa
from nova import utils
from nova.virt.block_device import DriverVolumeBlockDevice as driver_bdm_volume
from nova.virt import driver as virt_driver
@ -725,9 +726,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
@mock.patch.object(context, 'get_admin_context')
@mock.patch.object(manager.ComputeManager,
'_destroy_evacuated_instances')
@mock.patch.object(manager.ComputeManager,
'_validate_pinning_configuration')
@mock.patch.object(manager.ComputeManager, '_init_instance')
@mock.patch.object(self.compute, '_update_scheduler_instance_info')
def _do_mock_calls(mock_update_scheduler, mock_inst_init,
mock_validate_pinning,
mock_destroy, mock_admin_ctxt, mock_host_get,
mock_filter_off, mock_filter_on, mock_init_host,
defer_iptables_apply):
@ -739,6 +743,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
if defer_iptables_apply:
self.assertTrue(mock_filter_on.called)
mock_validate_pinning.assert_called_once_with(inst_list)
mock_destroy.assert_called_once_with(self.context)
mock_inst_init.assert_has_calls(
[mock.call(self.context, inst_list[0]),
@ -749,7 +755,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.assertTrue(mock_filter_off.called)
mock_init_host.assert_called_once_with(host=our_host)
mock_host_get.assert_called_once_with(self.context, our_host,
expected_attrs=['info_cache', 'metadata'])
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
mock_update_scheduler.assert_called_once_with(
self.context, inst_list)
@ -881,7 +887,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_init_host.assert_called_once_with(host=our_host)
mock_host_get.assert_called_once_with(self.context, our_host,
expected_attrs=['info_cache', 'metadata'])
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
mock_init_virt.assert_called_once_with()
mock_temp_mut.assert_called_once_with(self.context, read_deleted='yes')
mock_get_inst.assert_called_once_with(self.context)
@ -923,6 +929,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_init_instance.assert_called_once_with(
self.context, active_instance)
@mock.patch.object(objects.InstanceList, 'get_by_host',
new=mock.Mock())
@mock.patch('nova.compute.manager.ComputeManager.'
'_validate_pinning_configuration')
def test_init_host_pinning_configuration_validation_failure(self,
mock_validate_pinning):
"""Test that we fail init_host if the pinning configuration check
fails.
"""
mock_validate_pinning.side_effect = exception.InvalidConfiguration
self.assertRaises(exception.InvalidConfiguration,
self.compute.init_host)
def test_init_instance_with_binding_failed_vif_type(self):
# this instance will plug a 'binding_failed' vif
instance = fake_instance.fake_instance_obj(
@ -948,6 +968,95 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute._init_instance(self.context, instance)
set_error_state.assert_called_once_with(self.context, instance)
def _test__validate_pinning_configuration(self, supports_pcpus=True):
instance_1 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_1)
instance_2 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_2)
instance_3 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instance_3)
instance_4 = fake_instance.fake_instance_obj(
self.context, uuid=uuids.instnace_4)
instance_1.numa_topology = None
numa_wo_pinning = test_instance_numa.get_fake_obj_numa_topology(
self.context)
instance_2.numa_topology = numa_wo_pinning
numa_w_pinning = test_instance_numa.get_fake_obj_numa_topology(
self.context)
numa_w_pinning.cells[0].pin_vcpus((1, 10), (2, 11))
numa_w_pinning.cells[0].cpu_policy = (
fields.CPUAllocationPolicy.DEDICATED)
numa_w_pinning.cells[1].pin_vcpus((3, 0), (4, 1))
numa_w_pinning.cells[1].cpu_policy = (
fields.CPUAllocationPolicy.DEDICATED)
instance_3.numa_topology = numa_w_pinning
instance_4.deleted = True
instances = objects.InstanceList(objects=[
instance_1, instance_2, instance_3, instance_4])
with mock.patch.dict(self.compute.driver.capabilities,
supports_pcpus=supports_pcpus):
self.compute._validate_pinning_configuration(instances)
def test__validate_pinning_configuration_invalid_unpinned_config(self):
"""Test that configuring only 'cpu_dedicated_set' when there are
unpinned instances on the host results in an error.
"""
self.flags(cpu_dedicated_set='0-7', group='compute')
ex = self.assertRaises(
exception.InvalidConfiguration,
self._test__validate_pinning_configuration)
self.assertIn('This host has unpinned instances but has no CPUs '
'set aside for this purpose;',
six.text_type(ex))
def test__validate_pinning_configuration_invalid_pinned_config(self):
"""Test that configuring only 'cpu_shared_set' when there are pinned
instances on the host results in an error
"""
self.flags(cpu_shared_set='0-7', group='compute')
ex = self.assertRaises(
exception.InvalidConfiguration,
self._test__validate_pinning_configuration)
self.assertIn('This host has pinned instances but has no CPUs '
'set aside for this purpose;',
six.text_type(ex))
@mock.patch.object(manager.LOG, 'warning')
def test__validate_pinning_configuration_warning(self, mock_log):
"""Test that configuring 'cpu_dedicated_set' such that some pinned
cores of the instance are outside the range it specifies results in a
warning.
"""
self.flags(cpu_shared_set='0-7', cpu_dedicated_set='8-15',
group='compute')
self._test__validate_pinning_configuration()
self.assertEqual(1, mock_log.call_count)
self.assertIn('Instance is pinned to host CPUs %(cpus)s '
'but one or more of these CPUs are not included in ',
six.text_type(mock_log.call_args[0]))
def test__validate_pinning_configuration_no_config(self):
"""Test that the entire check is skipped if there's no host
configuration.
"""
self._test__validate_pinning_configuration()
def test__validate_pinning_configuration_not_supported(self):
"""Test that the entire check is skipped if the driver doesn't even
support PCPUs.
"""
self._test__validate_pinning_configuration(supports_pcpus=False)
def test__get_power_state_InstanceNotFound(self):
instance = fake_instance.fake_instance_obj(
self.context,

View File

@ -170,6 +170,7 @@ class ComputeDriver(object):
"supports_extend_volume": False,
"supports_multiattach": False,
"supports_trusted_certs": False,
"supports_pcpus": False,
# Image type support flags
"supports_image_type_aki": False,

View File

@ -111,6 +111,7 @@ class FakeDriver(driver.ComputeDriver):
"supports_extend_volume": True,
"supports_multiattach": True,
"supports_trusted_certs": True,
"supports_pcpus": False,
# Supported image types
"supports_image_type_raw": True,

View File

@ -100,6 +100,7 @@ class HyperVDriver(driver.ComputeDriver):
"supports_device_tagging": True,
"supports_multiattach": False,
"supports_trusted_certs": False,
"supports_pcpus": False,
# Supported image types
"supports_image_type_vhd": True,

View File

@ -162,6 +162,7 @@ class IronicDriver(virt_driver.ComputeDriver):
"supports_attach_interface": True,
"supports_multiattach": False,
"supports_trusted_certs": False,
"supports_pcpus": False,
# Image type support flags
"supports_image_type_aki": False,

View File

@ -342,6 +342,7 @@ class LibvirtDriver(driver.ComputeDriver):
# formats. If we are configured for those backends, then we
# should not expose the corresponding support traits.
"supports_image_type_qcow2": not requires_raw_image,
"supports_pcpus": True,
}
super(LibvirtDriver, self).__init__(virtapi)

View File

@ -77,6 +77,7 @@ class PowerVMDriver(driver.ComputeDriver):
'supports_extend_volume': True,
'supports_multiattach': False,
'supports_trusted_certs': False,
'supports_pcpus': False,
# Supported image types
"supports_image_type_aki": False,

View File

@ -69,6 +69,7 @@ class VMwareVCDriver(driver.ComputeDriver):
"supports_attach_interface": True,
"supports_multiattach": False,
"supports_trusted_certs": False,
"supports_pcpus": False,
# Image type support flags
"supports_image_type_aki": False,

View File

@ -69,6 +69,7 @@ class XenAPIDriver(driver.ComputeDriver):
"supports_device_tagging": True,
"supports_multiattach": False,
"supports_trusted_certs": False,
"supports_pcpus": False,
# Image type support flags
"supports_image_type_aki": False,

View File

@ -46,6 +46,8 @@ DEFAULT_EPH_DISK_FMT = 'ext3'
class ZVMDriver(driver.ComputeDriver):
"""z/VM implementation of ComputeDriver."""
capabilities = {
"supports_pcpus": False,
# Image type support flags
"supports_image_type_aki": False,
"supports_image_type_ami": False,