Add 'hw:cpu_threads_policy=isolate' scheduling

The 'isolate' CPU threads policy ensures that hosts provide a non-SMT, or non-SMT like architecture. In the latter case, if the host provides an SMT architecture, each vCPU will be placed on a different core and no vCPUs from other guests will be able to be placed on the same core, i.e. one thread sibling is guaranteed to always be unused. This provides a non-SMT like architecture. The 'isolate' policy, like the 'require' policy, should and will only be used when explicitly requested by the instance or image. It is necessary to add changes to the pinning code to ensure that a core's siblings, if any, will be marked as unusable (rather than just ignored) when the 'isolate' case is chosen. A core and all its siblings must be free to be considered usable. Change-Id: Ibe76846eaf1cee7501cafb9f13a4dd193110ba1f Implements: bp virt-driver-cpu-thread-pinning Co-Authored-By: Stephen Finucane <stephen.finucane@intel.com> DocImpact This completes the CPU thread pinning feature, which should now be documented as a metadata option.
2015-07-13 17:53:12 +01:00 · 2015-07-13 17:53:12 +01:00 · aaaba4a12e
parent 5d6e0086b5
commit aaaba4a12e
4 changed files with 201 additions and 28 deletions
--- a/nova/objects/numa.py
+++ b/nova/objects/numa.py
@ -93,6 +93,20 @@ class NUMACell(base.NovaObject):
                                              pinned=list(self.pinned_cpus))
        self.pinned_cpus -= cpus

+    def pin_cpus_with_siblings(self, cpus):
+        pin_siblings = set()
+        for sib in self.siblings:
+            if cpus & sib:
+                pin_siblings.update(sib)
+        self.pin_cpus(pin_siblings)
+
+    def unpin_cpus_with_siblings(self, cpus):
+        pin_siblings = set()
+        for sib in self.siblings:
+            if cpus & sib:
+                pin_siblings.update(sib)
+        self.unpin_cpus(pin_siblings)
+
    def _to_dict(self):
        return {
            'id': self.id,
--- a/nova/tests/unit/objects/test_numa.py
+++ b/nova/tests/unit/objects/test_numa.py
@ -100,6 +100,30 @@ class _TestNUMA(object):
        numacell.unpin_cpus(set([1, 2, 3]))
        self.assertEqual(set([1, 2, 3, 4]), numacell.free_cpus)

+    def test_pinning_with_siblings(self):
+        numacell = objects.NUMACell(id=0, cpuset=set([1, 2, 3, 4]), memory=512,
+                                    cpu_usage=2, memory_usage=256,
+                                    pinned_cpus=set([]),
+                                    siblings=[set([1, 3]), set([2, 4])],
+                                    mempages=[])
+
+        numacell.pin_cpus_with_siblings(set([1, 2]))
+        self.assertEqual(set(), numacell.free_cpus)
+        numacell.unpin_cpus_with_siblings(set([1]))
+        self.assertEqual(set([1, 3]), numacell.free_cpus)
+        self.assertRaises(exception.CPUPinningInvalid,
+                          numacell.unpin_cpus_with_siblings,
+                          set([3]))
+        self.assertRaises(exception.CPUPinningInvalid,
+                          numacell.pin_cpus_with_siblings,
+                          set([4]))
+        self.assertRaises(exception.CPUPinningInvalid,
+                          numacell.unpin_cpus_with_siblings,
+                          set([3, 4]))
+        self.assertEqual(set([1, 3]), numacell.free_cpus)
+        numacell.unpin_cpus_with_siblings(set([4]))
+        self.assertEqual(set([1, 2, 3, 4]), numacell.free_cpus)
+
    def test_pages_topology_wipe(self):
        pages_topology = objects.NUMAPagesTopology(
            size_kb=2048, total=1024, used=512)
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@ -2222,6 +2222,65 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
        got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)

+    def test_get_pinning_isolate_policy_too_few_fully_free_cores(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3])],
+                                    mempages=[], pinned_cpus=set([1]))
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertIsNone(inst_pin)
+
+    def test_get_pinning_isolate_policy_no_fully_free_cores(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3])],
+                                    mempages=[], pinned_cpus=set([1, 2]))
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertIsNone(inst_pin)
+
+    def test_get_pinning_isolate_policy_fits(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_isolate_policy_fits_w_usage(self):
+        host_pin = objects.NUMACell(
+                id=0,
+                cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                memory=4096, memory_usage=0,
+                pinned_cpus=set([0, 1]),
+                siblings=[set([0, 4]), set([1, 5]), set([2, 6]), set([3, 7])],
+                mempages=[])
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+

 class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
    def test_host_numa_fit_instance_to_host_single_cell(self):
@ -2431,6 +2490,48 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
                hw.numa_usage_from_instances, host_pin,
                [inst_pin_1, inst_pin_2])

+    def test_host_usage_from_instances_isolate(self):
+        host_pin = objects.NUMATopology(
+                cells=[objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                        memory=4096, cpu_usage=0,
+                                        memory_usage=0,
+                                        siblings=[set([0, 2]), set([1, 3])],
+                                        mempages=[], pinned_cpus=set([]))])
+        inst_pin_1 = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    cpuset=set([0, 1]), memory=2048, id=0,
+                    cpu_pinning={0: 0, 1: 1},
+                    cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                    cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE
+                    )])
+
+        new_cell = hw.numa_usage_from_instances(host_pin, [inst_pin_1])
+        self.assertEqual(host_pin.cells[0].cpuset,
+                         new_cell.cells[0].pinned_cpus)
+        self.assertEqual(new_cell.cells[0].cpu_usage, 4)
+
+    def test_host_usage_from_instances_isolate_free(self):
+        host_pin = objects.NUMATopology(
+                cells=[objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                        memory=4096, cpu_usage=4,
+                                        memory_usage=0,
+                                        siblings=[set([0, 2]), set([1, 3])],
+                                        mempages=[],
+                                        pinned_cpus=set([0, 1, 2, 3]))])
+        inst_pin_1 = objects.InstanceNUMATopology(
+                cells=[objects.InstanceNUMACell(
+                    cpuset=set([0, 1]), memory=2048, id=0,
+                    cpu_pinning={0: 0, 1: 1},
+                    cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                    cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE
+                    )])
+
+        new_cell = hw.numa_usage_from_instances(host_pin,
+                                                [inst_pin_1],
+                                                free=True)
+        self.assertEqual(set([]), new_cell.cells[0].pinned_cpus)
+        self.assertEqual(new_cell.cells[0].cpu_usage, 0)
+

 class CPURealtimeTestCase(test.NoDBTestCase):
    def test_success_flavor(self):
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@ -653,13 +653,17 @@ def _numa_cell_supports_pagesize_request(host_cell, inst_cell):
        return verify_pagesizes(host_cell, inst_cell, [inst_cell.pagesize])


-def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
+def _pack_instance_onto_cores(available_siblings,
+                              instance_cell,
+                              host_cell_id,
+                              threads_per_core=1):
    """Pack an instance onto a set of siblings

    :param available_siblings: list of sets of CPU id's - available
                               siblings per core
    :param instance_cell: An instance of objects.InstanceNUMACell describing
                          the pinning requirements of the instance
+    :param threads_per_core: number of threads per core in host's cell

    :returns: An instance of objects.InstanceNUMACell containing the pinning
              information, and potentially a new topology to be exposed to the
@ -729,6 +733,16 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
        return fractions.gcd(threads_per_core, _orphans(instance_cell,
                                                        threads_per_core))

+    def _get_pinning(threads_no, sibling_set, instance_cores):
+        """Generate a CPU-vCPU pin mapping."""
+        if threads_no * len(sibling_set) < len(instance_cores):
+            return
+
+        usable_cores = map(lambda s: list(s)[:threads_no], sibling_set)
+
+        return zip(sorted(instance_cores),
+                   itertools.chain(*usable_cores))
+
    if (instance_cell.cpu_thread_policy ==
            fields.CPUThreadAllocationPolicy.REQUIRE):
        LOG.debug("Requested 'require' thread policy for %d cores",
@ -741,37 +755,42 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
            fields.CPUThreadAllocationPolicy.ISOLATE):
        LOG.debug("Requested 'isolate' thread policy for %d cores",
                  len(instance_cell))
-        raise NotImplementedError("The 'isolate' policy is not supported.")
    else:
        LOG.debug("User did not specify a thread policy. Using default "
                  "for %d cores", len(instance_cell))

-    # NOTE(ndipanov): We iterate over the sibling sets in descending order
-    # of cores that can be packed. This is an attempt to evenly distribute
-    # instances among physical cores
-    for threads_no, sibling_set in sorted(
-            (t for t in sibling_sets.items()), reverse=True):
-        if threads_no * len(sibling_set) < len(instance_cell):
-            continue
+    if (instance_cell.cpu_thread_policy ==
+            fields.CPUThreadAllocationPolicy.ISOLATE):
+        # make sure we have at least one fully free core
+        if threads_per_core not in sibling_sets:
+            return

-        usable_cores = map(lambda s: list(s)[:threads_no], sibling_set)
+        pinning = _get_pinning(1,  # we only want to "use" one thread per core
+                               sibling_sets[threads_per_core],
+                               instance_cell.cpuset)
+    else:
+        # NOTE(ndipanov): We iterate over the sibling sets in descending order
+        # of cores that can be packed. This is an attempt to evenly distribute
+        # instances among physical cores
+        for threads_no, sibling_set in sorted(
+                (t for t in sibling_sets.items()), reverse=True):
+
+            # NOTE(sfinucan): The key difference between the require and
+            # prefer policies is that require will not settle for non-siblings
+            # if this is all that is available. Enforce this by ensuring we're
+            # using sibling sets that contain at least one sibling
+            if (instance_cell.cpu_thread_policy ==
+                    fields.CPUThreadAllocationPolicy.REQUIRE):
+                if threads_no <= 1:
+                    continue
+
+            pinning = _get_pinning(threads_no, sibling_set,
+                                   instance_cell.cpuset)
+            if pinning:
+                break

        threads_no = _threads(instance_cell, threads_no)

-        # NOTE(sfinucan): The key difference between the require and
-        # prefer policies is that require will not settle for non-siblings
-        # if this is all that is available. Enforce this by ensuring we're
-        # using sibling sets that contain at least one sibling
-        if (instance_cell.cpu_thread_policy ==
-                fields.CPUThreadAllocationPolicy.REQUIRE):
-            if threads_no <= 1:
-                continue
-
-        pinning = zip(sorted(instance_cell.cpuset),
-                      itertools.chain(*usable_cores))
-
-        break
-
    if not pinning:
        return

@ -804,7 +823,8 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
    if host_cell.siblings:
        # Try to pack the instance cell onto cores
        return _pack_instance_onto_cores(
-            host_cell.free_siblings, instance_cell, host_cell.id)
+            host_cell.free_siblings, instance_cell, host_cell.id,
+            max(map(len, host_cell.siblings)))
    else:
        # Straightforward to pin to available cpus when there is no
        # hyperthreading on the host
@ -1253,16 +1273,30 @@ def numa_usage_from_instances(host, instances, free=False):
                if instancecell.id == hostcell.id:
                    memory_usage = (
                            memory_usage + sign * instancecell.memory)
-                    cpu_usage = cpu_usage + sign * len(instancecell.cpuset)
+                    cpu_usage_diff = len(instancecell.cpuset)
+                    if (instancecell.cpu_thread_policy ==
+                            fields.CPUThreadAllocationPolicy.ISOLATE and
+                            hostcell.siblings):
+                        cpu_usage_diff *= max(map(len, hostcell.siblings))
+                    cpu_usage += sign * cpu_usage_diff
+
                    if instancecell.pagesize and instancecell.pagesize > 0:
                        newcell.mempages = _numa_pagesize_usage_from_cell(
                            hostcell, instancecell, sign)
                    if instance.cpu_pinning_requested:
                        pinned_cpus = set(instancecell.cpu_pinning.values())
                        if free:
-                            newcell.unpin_cpus(pinned_cpus)
+                            if (instancecell.cpu_thread_policy ==
+                                    fields.CPUThreadAllocationPolicy.ISOLATE):
+                                newcell.unpin_cpus_with_siblings(pinned_cpus)
+                            else:
+                                newcell.unpin_cpus(pinned_cpus)
                        else:
-                            newcell.pin_cpus(pinned_cpus)
+                            if (instancecell.cpu_thread_policy ==
+                                    fields.CPUThreadAllocationPolicy.ISOLATE):
+                                newcell.pin_cpus_with_siblings(pinned_cpus)
+                            else:
+                                newcell.pin_cpus(pinned_cpus)

            newcell.cpu_usage = max(0, cpu_usage)
            newcell.memory_usage = max(0, memory_usage)