From 0696a5cd5f0fdc08951a074961bb8ce0c3310086 Mon Sep 17 00:00:00 2001 From: Alexander Schmidt Date: Fri, 30 Jan 2015 11:56:24 +0100 Subject: [PATCH] Add handling for offlined CPUs to the nova libvirt driver. When a host system has CPUs that are offlined via CPU hotplug, nova fails to start an instance on the host. Currently the libvirt driver does not check if the CPUs it selects for running the instance are online or offline. As a result, CPUs that are offline can become part of the cpuset that is passed to libvirt. Libvirt presents the following error in this case: libvirtError: Invalid value '8-15,24-31' for 'cpuset.cpus': Invalid argument With this fix, the nova libvirt driver makes use of the getCPUMap API in libvirt to determine if CPUs are online or offline. When selecting a CPU set for running an instance, offline CPUs are masked out. Rationale: on server platforms like s390, it is common to have offlined CPUs on a host as the platform offers capabilities to run multiple host operatings systems (e.g. multiple KVM hypervisors / compute nodes). CPUs can dynamically be assigned to the different host operating systems, so it is common to have offlined CPUs on a compute node. Change-Id: I506ebc9608e17e02d807e5002fe867309c22aafc Closes-Bug: #1417144 --- nova/tests/unit/virt/libvirt/fakelibvirt.py | 4 +++ nova/tests/unit/virt/libvirt/test_driver.py | 28 ++++++++++++++++----- nova/virt/libvirt/driver.py | 10 +++++--- nova/virt/libvirt/host.py | 20 +++++++++++++++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py index 1bf071b4c4d9..9533b7200056 100644 --- a/nova/tests/unit/virt/libvirt/fakelibvirt.py +++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py @@ -827,6 +827,10 @@ class Connection(object): def registerCloseCallback(self, cb, opaque): pass + def getCPUMap(self): + """Return spoofed CPU map, showing 2 online CPUs.""" + return (2, [True] * 2, 2) + def getCapabilities(self): """Return spoofed capabilities.""" return ''' diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 5339175c59fa..3d74b997c9b8 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -1234,7 +1234,9 @@ class LibvirtConnTestCase(test.TestCase): mock.patch.object( random, 'choice', side_effect=lambda cells: cells[0]), mock.patch.object(pci_manager, "get_instance_pci_devs", - return_value=[pci_device])): + return_value=[pci_device]), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8)))): cfg = conn._get_guest_config(instance_ref, [], {}, disk_info) self.assertIsNone(instance_ref.numa_topology) self.assertEqual(set([2, 3]), cfg.cpuset) @@ -1279,6 +1281,8 @@ class LibvirtConnTestCase(test.TestCase): host.Host, "get_capabilities", return_value=caps), mock.patch.object( hardware, 'get_vcpu_pin_set', return_value=set([3])), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8))), mock.patch.object(pci_manager, "get_instance_pci_devs", return_value=[pci_device])): cfg = conn._get_guest_config(instance_ref, [], {}, disk_info) @@ -1402,9 +1406,12 @@ class LibvirtConnTestCase(test.TestCase): mock.patch.object( hardware, 'get_vcpu_pin_set', return_value=set([2, 3])), mock.patch.object( - random, 'choice', side_effect=lambda cells: cells[0]) + random, 'choice', side_effect=lambda cells: cells[0]), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8))) ) as (has_min_version_mock, get_host_cap_mock, - get_vcpu_pin_set_mock, choice_mock): + get_vcpu_pin_set_mock, choice_mock, + get_online_cpus_mock): cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info) # NOTE(ndipanov): we make sure that pin_set was taken into account # when choosing viable cells @@ -1498,7 +1505,9 @@ class LibvirtConnTestCase(test.TestCase): return_value=caps), mock.patch.object( hardware, 'get_vcpu_pin_set', - return_value=set([2, 3, 4, 5])) + return_value=set([2, 3, 4, 5])), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8))), ): cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info) self.assertIsNone(cfg.cpuset) @@ -1575,6 +1584,8 @@ class LibvirtConnTestCase(test.TestCase): return_value=True), mock.patch.object(host.Host, "get_capabilities", return_value=caps), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8))), ): cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info) self.assertIsNone(cfg.cpuset) @@ -1649,7 +1660,9 @@ class LibvirtConnTestCase(test.TestCase): mock.patch.object(host.Host, 'has_min_version', return_value=True), mock.patch.object(host.Host, "get_capabilities", - return_value=caps) + return_value=caps), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set(range(8))), ): cfg = conn._get_guest_config(instance_ref, [], {}, disk_info) self.assertIsNone(cfg.cpuset) @@ -9543,7 +9556,10 @@ class LibvirtConnTestCase(test.TestCase): mock.patch.object(host.Host, "get_capabilities", return_value=caps), mock.patch.object( - hardware, 'get_vcpu_pin_set', return_value=set([0, 1, 3])) + hardware, 'get_vcpu_pin_set', + return_value=set([0, 1, 3, 4, 5])), + mock.patch.object(host.Host, 'get_online_cpus', + return_value=set([0, 1, 2, 3, 6])), ): got_topo = drvr._get_host_numa_topology() got_topo_dict = got_topo._to_dict() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 6783b49f46b2..b6ac5d1157cd 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -4736,6 +4736,11 @@ class LibvirtDriver(driver.ComputeDriver): cells = [] allowed_cpus = hardware.get_vcpu_pin_set() + online_cpus = self._host.get_online_cpus() + if allowed_cpus: + allowed_cpus &= online_cpus + else: + allowed_cpus = online_cpus for cell in topology.cells: cpuset = set(cpu.id for cpu in cell.cpus) @@ -4744,9 +4749,8 @@ class LibvirtDriver(driver.ComputeDriver): if cpu.siblings else () for cpu in cell.cpus) )) - if allowed_cpus: - cpuset &= allowed_cpus - siblings = [sib & allowed_cpus for sib in siblings] + cpuset &= allowed_cpus + siblings = [sib & allowed_cpus for sib in siblings] # Filter out singles and empty sibling sets that may be left siblings = [sib for sib in siblings if len(sib) > 1] diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 95e266afe993..3045fa64774f 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -585,6 +585,26 @@ class Host(object): return doms + def get_online_cpus(self): + """Get the set of CPUs that are online on the host + + Method is only used by NUMA code paths which check on + libvirt version >= 1.0.4. getCPUMap() was introduced in + libvirt 1.0.0. + + :returns: set of online CPUs, raises libvirtError on error + + """ + + (cpus, cpu_map, online) = self.get_connection().getCPUMap() + + online_cpus = set() + for cpu in range(cpus): + if cpu_map[cpu]: + online_cpus.add(cpu) + + return online_cpus + def get_capabilities(self): """Returns the host capabilities information