Fix CPU pinning for odd number of CPUs w hyperthreading

Previous logic was not handling the cases when the instance cell has an odd number of vcpus that is larger than any sibling set on the host. The logic would (accidentally) either reject to pin such an instance even though there is ample free cores, or would (in case there were enough free sibling sets on the host) spread the instance instead of pack it onto siblings as the default policy suggests. This patch fixes some incorrect assumptions in the code, while also simplifying it. As an added bonus - we still attempt to expose (via the Topology, and this time correctly) the largest possible number of threads that we can expose to the instance. Finally - we add some more comments to clear up the intent behind the current packing logic with pointers how it could be tweaked to achieve different results in the future. Change-Id: I2c0b3b250ffb1a7483299df13b317cdb24f8141d Co-Authored-By: Stephen Finucane <stephen.finucane@intel.com> Closes-bug: 1501358 Closes-bug: 1467927
2015-09-30 19:30:59 +01:00 · 2015-09-30 19:30:59 +01:00 · d5bed8fad9
parent cc500b2c98
commit d5bed8fad9
2 changed files with 147 additions and 16 deletions
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@ -12,6 +12,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.

+import collections
 import uuid

 import mock
@ -1896,6 +1897,25 @@ class _CPUPinningTestCaseBase(object):
        self.assertEqual(len(instance_cell.cpuset),
                         len(instance_cell.cpu_pinning))

+    def assertPinningPreferThreads(self, instance_cell, host_cell):
+        """Make sure we are preferring threads.
+
+        We do this by assessing that at least 2 CPUs went to the same core
+        if that was even possible to begin with.
+        """
+        max_free_siblings = max(map(len, host_cell.free_siblings))
+        if len(instance_cell) > 1 and max_free_siblings > 1:
+            cpu_to_sib = {}
+            for sib in host_cell.free_siblings:
+                for cpu in sib:
+                    cpu_to_sib[cpu] = tuple(sorted(sib))
+            pins_per_sib = collections.defaultdict(int)
+            for inst_p, host_p in instance_cell.cpu_pinning.items():
+                pins_per_sib[cpu_to_sib[host_p]] += 1
+            self.assertTrue(max(pins_per_sib.values()) > 1,
+                            "Seems threads were not prefered by the pinning "
+                            "logic.")
+

 class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
    def test_get_pinning_inst_too_large_cpu(self):
@ -2031,7 +2051,7 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        self.assertInstanceCellPinned(inst_pin)
        got_topo = objects.VirtCPUTopology(sockets=1, cores=1, threads=4)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
-        got_pinning = {x: x + 4 for x in range(0, 4)}
+        got_pinning = {x: x for x in range(0, 4)}
        self.assertEqual(got_pinning, inst_pin.cpu_pinning)

    def test_get_pinning_host_siblings_fit(self):
@ -2048,6 +2068,76 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_pinning = {x: x for x in range(0, 4)}
        self.assertEqual(got_pinning, inst_pin.cpu_pinning)

+    def test_get_pinning_host_siblings_instance_odd_fit(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_fit_optimize_threads(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1, 2, 3]),
+                                              set([4, 5, 6, 7])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4, 5]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=2)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_odd_fit_w_usage(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([0, 2, 5]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=3, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_instance_odd_fit_orphan_only(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3]),
+                                              set([4, 5]), set([6, 7])],
+                                    mempages=[], pinned_cpus=set([0, 2, 5, 6]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=4, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_host_siblings_large_instance_odd_fit(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7,
+                                                      8, 9, 10, 11, 12, 13, 14,
+                                                      15]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 8]), set([1, 9]),
+                                              set([2, 10]), set([3, 11]),
+                                              set([4, 12]), set([5, 13]),
+                                              set([6, 14]), set([7, 15])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(cpuset=set([0, 1, 2, 3, 4]),
+                                            memory=2048)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        self.assertPinningPreferThreads(inst_pin, host_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+

 class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
    def test_host_numa_fit_instance_to_host_single_cell(self):
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@ -13,6 +13,7 @@
 # under the License.

 import collections
+import fractions
 import itertools

 from oslo_config import cfg
@ -674,6 +675,16 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
    This method will calculate the pinning for the given instance and it's
    topology, making sure that hyperthreads of the instance match up with
    those of the host when the pinning takes effect.
+
+    Currently the strategy for packing is to prefer siblings and try use
+    cores evenly, by using emptier cores first. This is achieved by the way we
+    order cores in the can_pack structure, and the order in which we iterate
+    through it.
+
+    The main packing loop that iterates over the can_pack dictionary will not
+    currently try to look for a fit that maximizes number of siblings, but will
+    simply rely on the iteration ordering and picking the first viable
+    placement.
    """

    # We build up a data structure 'can_pack' that answers the question:
@ -689,8 +700,44 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):

        if threads_per_core * len(cores_list) < len(instance_cell):
            return False
-        else:
-            return len(instance_cell) % threads_per_core == 0
+        return True
+
+    def _orphans(instance_cell, threads_per_core):
+        """Number of instance CPUs which will not fill up a host core.
+
+        Best explained by an example: consider set of free host cores as such:
+            [(0, 1), (3, 5), (6, 7, 8)]
+        This would be a case of 2 threads_per_core AKA an entry for 2 in the
+        can_pack structure.
+
+        If we attempt to pack a 5 core instance on it - due to the fact that we
+        iterate the list in order, we will end up with a single core of the
+        instance pinned to a thread "alone" (with id 6), and we would have one
+        'orphan' vcpu.
+        """
+        return len(instance_cell) % threads_per_core
+
+    def _threads(instance_cell, threads_per_core):
+        """Threads to expose to the instance via the VirtCPUTopology.
+
+        This is calculated by taking the GCD of the number of threads we are
+        considering at the moment, and the number of orphans. An example for
+            instance_cell = 6
+            threads_per_core = 4
+
+        So we can fit the instance as such:
+            [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)]
+              x  x  x  x    x  x
+
+        We can't expose 4 threads, as that will not be a valid topology (all
+        cores exposed to the guest have to have an equal number of threads),
+        and 1 would be too restrictive, but we want all threads that guest sees
+        to be on the same physical core, so we take GCD of 4 (max number of
+        threads) and 2 (number of 'orphan' CPUs) and get 2 as the number of
+        threads.
+        """
+        return fractions.gcd(threads_per_core, _orphans(instance_cell,
+                                                        threads_per_core))

    # We iterate over the can_pack dict in descending order of cores that
    # can be packed - an attempt to get even distribution over time
@ -702,9 +749,11 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
            pinning = zip(sorted(instance_cell.cpuset),
                          itertools.chain(*sliced_sibs))

+            threads = _threads(instance_cell, cores_per_sib)
+            cores = len(instance_cell) / threads
            topology = objects.VirtCPUTopology(sockets=1,
-                                               cores=len(sliced_sibs),
-                                               threads=cores_per_sib)
+                                               cores=cores,
+                                               threads=threads)
            instance_cell.pin_vcpus(*pinning)
            instance_cell.cpu_topology = topology
            instance_cell.id = host_cell_id
@ -729,17 +778,9 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
        return

    if host_cell.siblings:
-        # Try to pack the instance cell in one core
-        largest_free_sibling_set = sorted(
-            host_cell.free_siblings, key=len)[-1]
-        if len(instance_cell.cpuset) <= len(largest_free_sibling_set):
-            return _pack_instance_onto_cores(
-                [largest_free_sibling_set], instance_cell, host_cell.id)
-
-        # We can't to pack it onto one core so try with avail siblings
-        else:
-            return _pack_instance_onto_cores(
-                host_cell.free_siblings, instance_cell, host_cell.id)
+        # Try to pack the instance cell onto cores
+        return _pack_instance_onto_cores(
+            host_cell.free_siblings, instance_cell, host_cell.id)
    else:
        # Straightforward to pin to available cpus when there is no
        # hyperthreading on the host