diff --git a/nova/compute/manager.py b/nova/compute/manager.py index c42382b19969..594e6b3b2a8d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -99,6 +99,7 @@ from nova.virt import block_device as driver_block_device from nova.virt import configdrive from nova.virt import driver from nova.virt import event as virtevent +from nova.virt import hardware from nova.virt import storage_users from nova.virt import virtapi from nova.volume import cinder @@ -816,6 +817,63 @@ class ComputeManager(manager.Manager): self._clean_instance_console_tokens(context, instance) self._delete_scheduler_instance_info(context, instance.uuid) + def _validate_pinning_configuration(self, instances): + if not self.driver.capabilities.get('supports_pcpus', False): + return + + for instance in instances: + # ignore deleted instances + if instance.deleted: + continue + + # if this is an unpinned instance and the host only has + # 'cpu_dedicated_set' configured, we need to tell the operator to + # correct their configuration + if not (instance.numa_topology and + instance.numa_topology.cpu_pinning_requested): + + # we don't need to check 'vcpu_pin_set' since it can't coexist + # alongside 'cpu_dedicated_set' + if (CONF.compute.cpu_dedicated_set and + not CONF.compute.cpu_shared_set): + msg = _("This host has unpinned instances but has no CPUs " + "set aside for this purpose; configure '[compute] " + "cpu_shared_set' instead of, or in addition to, " + "'[compute] cpu_dedicated_set'") + raise exception.InvalidConfiguration(msg) + + continue + + # ditto for pinned instances if only 'cpu_shared_set' is configured + if (CONF.compute.cpu_shared_set and + not CONF.compute.cpu_dedicated_set and + not CONF.vcpu_pin_set): + msg = _("This host has pinned instances but has no CPUs " + "set aside for this purpose; configure '[compute] " + "cpu_dedicated_set' instead of, or in addition to, " + "'[compute] cpu_shared_set'") + raise exception.InvalidConfiguration(msg) + + # also check to make sure the operator hasn't accidentally + # dropped some cores that instances are currently using + available_dedicated_cpus = (hardware.get_vcpu_pin_set() or + hardware.get_cpu_dedicated_set()) + pinned_cpus = instance.numa_topology.cpu_pinning + if available_dedicated_cpus and ( + pinned_cpus - available_dedicated_cpus): + # we can't raise an exception because of bug #1289064, + # which meant we didn't recalculate CPU pinning information + # when we live migrated a pinned instance + LOG.warning( + "Instance is pinned to host CPUs %(cpus)s " + "but one or more of these CPUs are not included in " + "either '[compute] cpu_dedicated_set' or " + "'vcpu_pin_set'; you should update these " + "configuration options to include the missing CPUs " + "or rebuild or cold migrate this instance.", + {'cpus': list(pinned_cpus)}, + instance=instance) + def _init_instance(self, context, instance): """Initialize this instance during service init.""" @@ -1255,13 +1313,16 @@ class ComputeManager(manager.Manager): self.driver.init_host(host=self.host) context = nova.context.get_admin_context() instances = objects.InstanceList.get_by_host( - context, self.host, expected_attrs=['info_cache', 'metadata']) + context, self.host, + expected_attrs=['info_cache', 'metadata', 'numa_topology']) if CONF.defer_iptables_apply: self.driver.filter_defer_apply_on() self.init_virt_events() + self._validate_pinning_configuration(instances) + try: # checking that instance was not already evacuated to other host evacuated_instances = self._destroy_evacuated_instances(context) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index dd783d4f5c34..aa6f40193467 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -70,6 +70,7 @@ from nova.tests.unit import fake_network_cache_model from nova.tests.unit import fake_notifier from nova.tests.unit.objects import test_instance_fault from nova.tests.unit.objects import test_instance_info_cache +from nova.tests.unit.objects import test_instance_numa from nova import utils from nova.virt.block_device import DriverVolumeBlockDevice as driver_bdm_volume from nova.virt import driver as virt_driver @@ -725,9 +726,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, @mock.patch.object(context, 'get_admin_context') @mock.patch.object(manager.ComputeManager, '_destroy_evacuated_instances') + @mock.patch.object(manager.ComputeManager, + '_validate_pinning_configuration') @mock.patch.object(manager.ComputeManager, '_init_instance') @mock.patch.object(self.compute, '_update_scheduler_instance_info') def _do_mock_calls(mock_update_scheduler, mock_inst_init, + mock_validate_pinning, mock_destroy, mock_admin_ctxt, mock_host_get, mock_filter_off, mock_filter_on, mock_init_host, defer_iptables_apply): @@ -739,6 +743,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, if defer_iptables_apply: self.assertTrue(mock_filter_on.called) + + mock_validate_pinning.assert_called_once_with(inst_list) mock_destroy.assert_called_once_with(self.context) mock_inst_init.assert_has_calls( [mock.call(self.context, inst_list[0]), @@ -749,7 +755,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.assertTrue(mock_filter_off.called) mock_init_host.assert_called_once_with(host=our_host) mock_host_get.assert_called_once_with(self.context, our_host, - expected_attrs=['info_cache', 'metadata']) + expected_attrs=['info_cache', 'metadata', 'numa_topology']) mock_update_scheduler.assert_called_once_with( self.context, inst_list) @@ -881,7 +887,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock_init_host.assert_called_once_with(host=our_host) mock_host_get.assert_called_once_with(self.context, our_host, - expected_attrs=['info_cache', 'metadata']) + expected_attrs=['info_cache', 'metadata', 'numa_topology']) mock_init_virt.assert_called_once_with() mock_temp_mut.assert_called_once_with(self.context, read_deleted='yes') mock_get_inst.assert_called_once_with(self.context) @@ -923,6 +929,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock_init_instance.assert_called_once_with( self.context, active_instance) + @mock.patch.object(objects.InstanceList, 'get_by_host', + new=mock.Mock()) + @mock.patch('nova.compute.manager.ComputeManager.' + '_validate_pinning_configuration') + def test_init_host_pinning_configuration_validation_failure(self, + mock_validate_pinning): + """Test that we fail init_host if the pinning configuration check + fails. + """ + mock_validate_pinning.side_effect = exception.InvalidConfiguration + + self.assertRaises(exception.InvalidConfiguration, + self.compute.init_host) + def test_init_instance_with_binding_failed_vif_type(self): # this instance will plug a 'binding_failed' vif instance = fake_instance.fake_instance_obj( @@ -948,6 +968,95 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.compute._init_instance(self.context, instance) set_error_state.assert_called_once_with(self.context, instance) + def _test__validate_pinning_configuration(self, supports_pcpus=True): + instance_1 = fake_instance.fake_instance_obj( + self.context, uuid=uuids.instance_1) + instance_2 = fake_instance.fake_instance_obj( + self.context, uuid=uuids.instance_2) + instance_3 = fake_instance.fake_instance_obj( + self.context, uuid=uuids.instance_3) + instance_4 = fake_instance.fake_instance_obj( + self.context, uuid=uuids.instnace_4) + + instance_1.numa_topology = None + + numa_wo_pinning = test_instance_numa.get_fake_obj_numa_topology( + self.context) + instance_2.numa_topology = numa_wo_pinning + + numa_w_pinning = test_instance_numa.get_fake_obj_numa_topology( + self.context) + numa_w_pinning.cells[0].pin_vcpus((1, 10), (2, 11)) + numa_w_pinning.cells[0].cpu_policy = ( + fields.CPUAllocationPolicy.DEDICATED) + numa_w_pinning.cells[1].pin_vcpus((3, 0), (4, 1)) + numa_w_pinning.cells[1].cpu_policy = ( + fields.CPUAllocationPolicy.DEDICATED) + instance_3.numa_topology = numa_w_pinning + + instance_4.deleted = True + + instances = objects.InstanceList(objects=[ + instance_1, instance_2, instance_3, instance_4]) + + with mock.patch.dict(self.compute.driver.capabilities, + supports_pcpus=supports_pcpus): + self.compute._validate_pinning_configuration(instances) + + def test__validate_pinning_configuration_invalid_unpinned_config(self): + """Test that configuring only 'cpu_dedicated_set' when there are + unpinned instances on the host results in an error. + """ + self.flags(cpu_dedicated_set='0-7', group='compute') + + ex = self.assertRaises( + exception.InvalidConfiguration, + self._test__validate_pinning_configuration) + self.assertIn('This host has unpinned instances but has no CPUs ' + 'set aside for this purpose;', + six.text_type(ex)) + + def test__validate_pinning_configuration_invalid_pinned_config(self): + """Test that configuring only 'cpu_shared_set' when there are pinned + instances on the host results in an error + """ + self.flags(cpu_shared_set='0-7', group='compute') + + ex = self.assertRaises( + exception.InvalidConfiguration, + self._test__validate_pinning_configuration) + self.assertIn('This host has pinned instances but has no CPUs ' + 'set aside for this purpose;', + six.text_type(ex)) + + @mock.patch.object(manager.LOG, 'warning') + def test__validate_pinning_configuration_warning(self, mock_log): + """Test that configuring 'cpu_dedicated_set' such that some pinned + cores of the instance are outside the range it specifies results in a + warning. + """ + self.flags(cpu_shared_set='0-7', cpu_dedicated_set='8-15', + group='compute') + + self._test__validate_pinning_configuration() + + self.assertEqual(1, mock_log.call_count) + self.assertIn('Instance is pinned to host CPUs %(cpus)s ' + 'but one or more of these CPUs are not included in ', + six.text_type(mock_log.call_args[0])) + + def test__validate_pinning_configuration_no_config(self): + """Test that the entire check is skipped if there's no host + configuration. + """ + self._test__validate_pinning_configuration() + + def test__validate_pinning_configuration_not_supported(self): + """Test that the entire check is skipped if the driver doesn't even + support PCPUs. + """ + self._test__validate_pinning_configuration(supports_pcpus=False) + def test__get_power_state_InstanceNotFound(self): instance = fake_instance.fake_instance_obj( self.context, diff --git a/nova/virt/driver.py b/nova/virt/driver.py index d25bbadfd7b7..c7d6dfa8ce71 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -170,6 +170,7 @@ class ComputeDriver(object): "supports_extend_volume": False, "supports_multiattach": False, "supports_trusted_certs": False, + "supports_pcpus": False, # Image type support flags "supports_image_type_aki": False, diff --git a/nova/virt/fake.py b/nova/virt/fake.py index f94ed65315f0..e185ad14b0f1 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -111,6 +111,7 @@ class FakeDriver(driver.ComputeDriver): "supports_extend_volume": True, "supports_multiattach": True, "supports_trusted_certs": True, + "supports_pcpus": False, # Supported image types "supports_image_type_raw": True, diff --git a/nova/virt/hyperv/driver.py b/nova/virt/hyperv/driver.py index 32ea069a7092..f86a3dd40914 100644 --- a/nova/virt/hyperv/driver.py +++ b/nova/virt/hyperv/driver.py @@ -100,6 +100,7 @@ class HyperVDriver(driver.ComputeDriver): "supports_device_tagging": True, "supports_multiattach": False, "supports_trusted_certs": False, + "supports_pcpus": False, # Supported image types "supports_image_type_vhd": True, diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index a58524f7770d..358fb1a148ca 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -162,6 +162,7 @@ class IronicDriver(virt_driver.ComputeDriver): "supports_attach_interface": True, "supports_multiattach": False, "supports_trusted_certs": False, + "supports_pcpus": False, # Image type support flags "supports_image_type_aki": False, diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 862c362fc0a2..3c2bade01534 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -342,6 +342,7 @@ class LibvirtDriver(driver.ComputeDriver): # formats. If we are configured for those backends, then we # should not expose the corresponding support traits. "supports_image_type_qcow2": not requires_raw_image, + "supports_pcpus": True, } super(LibvirtDriver, self).__init__(virtapi) diff --git a/nova/virt/powervm/driver.py b/nova/virt/powervm/driver.py index d066f7cb0990..14a89b29eb50 100644 --- a/nova/virt/powervm/driver.py +++ b/nova/virt/powervm/driver.py @@ -77,6 +77,7 @@ class PowerVMDriver(driver.ComputeDriver): 'supports_extend_volume': True, 'supports_multiattach': False, 'supports_trusted_certs': False, + 'supports_pcpus': False, # Supported image types "supports_image_type_aki": False, diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py index 02ea634c5370..e53abf703755 100644 --- a/nova/virt/vmwareapi/driver.py +++ b/nova/virt/vmwareapi/driver.py @@ -69,6 +69,7 @@ class VMwareVCDriver(driver.ComputeDriver): "supports_attach_interface": True, "supports_multiattach": False, "supports_trusted_certs": False, + "supports_pcpus": False, # Image type support flags "supports_image_type_aki": False, diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index ef3eb810f535..5b23ef3869f8 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -69,6 +69,7 @@ class XenAPIDriver(driver.ComputeDriver): "supports_device_tagging": True, "supports_multiattach": False, "supports_trusted_certs": False, + "supports_pcpus": False, # Image type support flags "supports_image_type_aki": False, diff --git a/nova/virt/zvm/driver.py b/nova/virt/zvm/driver.py index de6d8fd7e029..0ed5f98dae01 100644 --- a/nova/virt/zvm/driver.py +++ b/nova/virt/zvm/driver.py @@ -46,6 +46,8 @@ DEFAULT_EPH_DISK_FMT = 'ext3' class ZVMDriver(driver.ComputeDriver): """z/VM implementation of ComputeDriver.""" capabilities = { + "supports_pcpus": False, + # Image type support flags "supports_image_type_aki": False, "supports_image_type_ami": False,