Merge "Make overcommit check for pinned instance pagesize aware"

This commit is contained in:
Zuul 2024-05-08 13:55:26 +00:00 committed by Gerrit Code Review
commit 114b8184e4
2 changed files with 150 additions and 87 deletions

View File

@ -2529,73 +2529,146 @@ class NUMATopologyTest(test.NoDBTestCase):
class VirtNUMATopologyCellUsageTestCase(test.NoDBTestCase):
def test_fit_instance_cell_success_no_limit(self):
def test_fit_instance_cell_no_host_mempages(self):
"""Validate fitting without host or guest mempages.
This tests overcommitting without host mempages, which is allowed, and
self overcommit, which is not.
"""
# host cell has 1024 MiB memory with no mempages reported
host_cell = objects.NUMACell(
id=4,
cpuset=set([1, 2]),
pcpuset=set(),
memory=1024,
cpu_usage=0,
memory_usage=0,
pinned_cpus=set(),
mempages=[objects.NUMAPagesTopology(
size_kb=4, total=524288, used=0)],
siblings=[set([1]), set([2])])
memory_usage=512,
mempages=[],
siblings=[set([1]), set([2])],
pinned_cpus=set([]))
# instance cell requests 1024 MiB memory, no mempages -> PASS
instance_cell = objects.InstanceNUMACell(
id=0, cpuset=set([1, 2]), pcpuset=set(), memory=1024)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsInstance(fitted_cell, objects.InstanceNUMACell)
self.assertEqual(host_cell.id, fitted_cell.id)
def test_fit_instance_cell_success_w_limit(self):
# instance cell requests 4096 MiB memory, no mempages -> FAIL
instance_cell = objects.InstanceNUMACell(
cpuset=set([1, 2, 3]), memory=4096)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsNone(fitted_cell)
# instance cell requests 1024 MiB memory, no mempages, but has
# dedicated CPUs -> FAIL
instance_cell = objects.InstanceNUMACell(
cpuset=set([0, 1]), memory=1024,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsNone(fitted_cell)
def test_fit_instance_cell_no_guest_mempages(self):
"""Validate fitting without guest mempages.
This tests overcommitting with host small mempages, which is allowed,
and self overcommit, which is not.
"""
# host cell has 1024 MiB memory, all small pages
host_cell = objects.NUMACell(
id=4,
cpuset=set([1, 2]),
pcpuset=set(),
memory=1024,
cpu_usage=0,
memory_usage=512,
mempages=[
# 262144 * 4 KiB (1024 MiB) 4k pages, which are all currently
# "used"
objects.NUMAPagesTopology(size_kb=4, total=262144, used=262144)
],
siblings=[set([1]), set([2])],
pinned_cpus=set([]))
# instance cell requests 1024 MiB memory, no mempages -> PASS
instance_cell = objects.InstanceNUMACell(
cpuset=set([1, 2]), memory=1024)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsInstance(fitted_cell, objects.InstanceNUMACell)
self.assertEqual(host_cell.id, fitted_cell.id)
# instance cell requests 4096 MiB memory, no mempages -> FAIL
instance_cell = objects.InstanceNUMACell(
id=0, cpuset=set([1, 2, 3]), memory=4096)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsNone(fitted_cell)
# instance cell requests 1024 MiB memory, no mempages, but has
# dedicated CPUs -> FAIL
instance_cell = objects.InstanceNUMACell(
cpuset=set([0, 1]), memory=1024,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsNone(fitted_cell)
def test_fit_instance_cell_mempages(self):
"""Validate fitting with guest (and host) mempages.
This also tests overcommitting with explicitly requested guest
mempages, which is not allowed.
"""
# host cell has 1024 MiB memory, all small pages
host_cell = objects.NUMACell(
id=4,
cpuset=set([0, 1]),
pcpuset=set(),
memory=1024,
cpu_usage=0, memory_usage=0, mempages=[
# 262144 * 4 KiB (1024 MiB) 4k pages, none used
objects.NUMAPagesTopology(size_kb=4, total=262144, used=0)
],
siblings=[set([0]), set([1])],
pinned_cpus=set([]))
# instance cell requests 1024 MiB memory, all small pages -> PASS
instance_cell = objects.InstanceNUMACell(
cpuset=set([0, 1]), memory=1024, pagesize=4)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertEqual(host_cell.id, fitted_cell.id)
# host cell now only has 1023 MiB memory, all small pages
host_cell.memory_usage = 1
host_cell.mempages[0].used = 1
# instance cell requests 1024 MiB memory again, all small pages -> FAIL
instance_cell = objects.InstanceNUMACell(
cpuset=set([0, 1]), memory=1024, pagesize=4)
fitted_cell = hw._numa_fit_instance_cell(host_cell, instance_cell)
self.assertIsNone(fitted_cell)
def test_fit_instance_cell_with_limit(self):
host_cell = objects.NUMACell(
id=4,
cpuset=set([1, 2]),
pcpuset=set(),
memory=1024,
cpu_usage=2,
memory_usage=1024,
pinned_cpus=set(),
mempages=[objects.NUMAPagesTopology(
size_kb=4, total=524288, used=0)],
siblings=[set([1]), set([2])])
mempages=[
objects.NUMAPagesTopology(size_kb=4, total=524288, used=0)
],
siblings=[set([1]), set([2])],
pinned_cpus=set([]))
limits = objects.NUMATopologyLimits(
cpu_allocation_ratio=2, ram_allocation_ratio=2)
instance_cell = objects.InstanceNUMACell(
id=0, cpuset=set([1, 2]), pcpuset=set(), memory=1024)
cpuset=set([1, 2]), memory=1024)
fitted_cell = hw._numa_fit_instance_cell(
host_cell, instance_cell, limits=limits)
self.assertIsInstance(fitted_cell, objects.InstanceNUMACell)
self.assertEqual(host_cell.id, fitted_cell.id)
def test_fit_instance_cell_self_overcommit(self):
host_cell = objects.NUMACell(
id=4,
cpuset=set([1, 2]),
memory=1024,
cpu_usage=0,
memory_usage=0,
mempages=[objects.NUMAPagesTopology(
size_kb=4, total=524288, used=0)],
siblings=[set([1]), set([2])],
pinned_cpus=set())
limits = objects.NUMATopologyLimits(
cpu_allocation_ratio=2, ram_allocation_ratio=2)
instance_cell = objects.InstanceNUMACell(
id=0, cpuset=set([1, 2, 3]), memory=4096)
fitted_cell = hw._numa_fit_instance_cell(
host_cell, instance_cell, limits=limits)
self.assertIsNone(fitted_cell)
def test_fit_instance_cell_fail_w_limit(self):
host_cell = objects.NUMACell(
id=4,
cpuset=set([1, 2]),
memory=1024,
cpu_usage=2,
memory_usage=1024,
mempages=[objects.NUMAPagesTopology(
size_kb=4, total=524288, used=0)],
siblings=[set([1]), set([2])],
pinned_cpus=set())
instance_cell = objects.InstanceNUMACell(
id=0, cpuset=set([1, 2]), pcpuset=set(), memory=4096)
limits = objects.NUMATopologyLimits(
@ -3169,30 +3242,6 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
self.assertIsNone(inst_pin)
def test_get_pinning_inst_too_large_mem(self):
host_pin = objects.NUMACell(
id=0,
cpuset=set(),
pcpuset=set([0, 1, 2]),
memory=2048,
memory_usage=1024,
pinned_cpus=set(),
mempages=[],
siblings=[set([0]), set([1]), set([2])])
inst_pin = objects.InstanceNUMACell(
cpuset=set(),
pcpuset=set([0, 1, 2]),
memory=2048,
cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
)
limits = objects.NUMATopologyLimits(
cpu_allocation_ratio=2, ram_allocation_ratio=2,
)
inst_pin = hw._numa_fit_instance_cell(host_pin, inst_pin, limits)
self.assertIsNone(inst_pin)
def test_get_pinning_inst_not_avail(self):
host_pin = objects.NUMACell(
id=0,

View File

@ -961,10 +961,15 @@ def _numa_fit_instance_cell(
LOG.debug('No specific pagesize requested for instance, '
'selected pagesize: %d', pagesize)
# we want to allow overcommit in this case as we're not using
# hugepages
if not host_cell.can_fit_pagesize(pagesize,
instance_cell.memory * units.Ki,
use_free=False):
# hugepages *except* if using CPU pinning, which for legacy reasons
# does not allow overcommit
use_free = instance_cell.cpu_policy in (
fields.CPUAllocationPolicy.DEDICATED,
fields.CPUAllocationPolicy.MIXED,
)
if not host_cell.can_fit_pagesize(
pagesize, instance_cell.memory * units.Ki, use_free=use_free
):
LOG.debug('Not enough available memory to schedule instance '
'with pagesize %(pagesize)d. Required: '
'%(required)s, available: %(available)s, total: '
@ -977,16 +982,35 @@ def _numa_fit_instance_cell(
else:
# The host does not support explicit page sizes. Ignore pagesizes
# completely.
# NOTE(stephenfin): Do not allow an instance to overcommit against
# itself on any NUMA cell, i.e. with 'ram_allocation_ratio = 2.0'
# on a host with 1GB RAM, we should allow two 1GB instances but not
# one 2GB instance.
if instance_cell.memory > host_cell.memory:
LOG.debug('Not enough host cell memory to fit instance cell. '
'Required: %(required)d, actual: %(actual)d',
{'required': instance_cell.memory,
'actual': host_cell.memory})
return None
# one 2GB instance. If CPU pinning is in use, don't allow
# overcommit at all.
if instance_cell.cpu_policy in (
fields.CPUAllocationPolicy.DEDICATED,
fields.CPUAllocationPolicy.MIXED,
):
if host_cell.avail_memory < instance_cell.memory:
LOG.debug(
'Not enough host cell memory to fit instance cell. '
'Oversubscription is not possible with pinned '
'instances. '
'Required: %(required)d, available: %(available)d, '
'total: %(total)d. ',
{'required': instance_cell.memory,
'available': host_cell.avail_memory,
'total': host_cell.memory})
return None
else:
if host_cell.memory < instance_cell.memory:
LOG.debug(
'Not enough host cell memory to fit instance cell. '
'Required: %(required)d, actual: %(total)d',
{'required': instance_cell.memory,
'total': host_cell.memory})
return None
# NOTE(stephenfin): As with memory, do not allow an instance to overcommit
# against itself on any NUMA cell
@ -1036,16 +1060,6 @@ def _numa_fit_instance_cell(
'num_cpu_reserved': cpuset_reserved})
return None
if instance_cell.memory > host_cell.avail_memory:
LOG.debug('Not enough available memory to schedule instance. '
'Oversubscription is not possible with pinned '
'instances. Required: %(required)s, available: '
'%(available)s, total: %(total)s. ',
{'required': instance_cell.memory,
'available': host_cell.avail_memory,
'total': host_cell.memory})
return None
# Try to pack the instance cell onto cores
instance_cell = _pack_instance_onto_cores(
host_cell, instance_cell, num_cpu_reserved=cpuset_reserved,