Instances with NUMA will be packed onto hosts

This patch makes the NUMATopologyFilter and instance claims on the
compute host use instance fitting logic to allow for actually packing
instances onto NUMA capable hosts.

This also means that the NUMA placement that is calculated during a
successfull claim will need to be updated in the database to reflect the
host NUMA cell ids the instance cells will be pinned to.

Using fit_instance_to_host() to decide weather an instance can land
on a host makes the NUMATopologyFilter code cleaner as it now fully
re-uses all the logic in VirtNUMAHostTopology and
VirtNUMATopologyCellUsage classes.

Closes-bug: #1386236
(cherry picked from commit 53099f3bf2)

Conflicts:
	nova/compute/manager.py
	nova/tests/unit/compute/test_claims.py
	nova/tests/unit/compute/test_resource_tracker.py
	nova/virt/hardware.py

Change-Id: Ieabafea73b4d566f4194ca60be38b6415d8a8f3d
This commit is contained in:
Nikola Dipanov 2014-11-12 17:14:01 +01:00
parent ccb7ef2b01
commit ee00c8015c
9 changed files with 61 additions and 45 deletions

View File

@ -282,8 +282,7 @@ and try to match it with the topology exposed by the host, accounting for the
``ram_allocation_ratio`` and ``cpu_allocation_ratio`` for over-subscription. The
filtering is done in the following manner:
* Filter will try to match the exact NUMA cells of the instance to those of
the host. It *will not* attempt to pack the instance onto the host.
* Filter will attempt to pack instance cells onto host cells.
* It will consider the standard over-subscription limits for each host NUMA cell,
and provide limits to the compute host accordingly (as mentioned above).
* If instance has no topology defined, it will be considered for any host.

View File

@ -35,6 +35,7 @@ class NopClaim(object):
def __init__(self, migration=None):
self.migration = migration
self.claimed_numa_topology = None
@property
def disk_gb(self):
@ -200,13 +201,22 @@ class Claim(NopClaim):
def _test_numa_topology(self, resources, limit):
host_topology = resources.get('numa_topology')
if host_topology and limit:
requested_topology = (self.numa_topology and
self.numa_topology.topology_from_obj())
if host_topology:
host_topology = hardware.VirtNUMAHostTopology.from_json(
host_topology)
instances_topology = (
[self.numa_topology] if self.numa_topology else [])
return hardware.VirtNUMAHostTopology.claim_test(
host_topology, instances_topology, limit)
instance_topology = (
hardware.VirtNUMAHostTopology.fit_instance_to_host(
host_topology, requested_topology,
limits_topology=limit))
if requested_topology and not instance_topology:
return (_("Requested instance NUMA topology cannot fit "
"the given host NUMA topology"))
elif instance_topology:
self.claimed_numa_topology = (
objects.InstanceNUMATopology.obj_from_topology(
instance_topology))
def _test(self, type_, unit, total, used, requested, limit):
"""Test if the given type of resource needed for a claim can be safely
@ -263,8 +273,11 @@ class ResizeClaim(Claim):
@property
def numa_topology(self):
return hardware.VirtNUMAInstanceTopology.get_constraints(
instance_topology = hardware.VirtNUMAInstanceTopology.get_constraints(
self.instance_type, self.image_meta)
if instance_topology:
return objects.InstanceNUMATopology.obj_from_topology(
instance_topology)
def _test_pci(self):
pci_requests = objects.InstancePCIRequests.\

View File

@ -1404,7 +1404,7 @@ class ComputeManager(manager.Manager):
rt = self._get_resource_tracker(node)
try:
limits = filter_properties.get('limits', {})
with rt.instance_claim(context, instance, limits):
with rt.instance_claim(context, instance, limits) as inst_claim:
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
@ -1419,6 +1419,7 @@ class ComputeManager(manager.Manager):
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.BLOCK_DEVICE_MAPPING
instance.numa_topology = inst_claim.claimed_numa_topology
instance.save()
# Verify that all the BDMs have a device_name set and assign a
@ -2090,7 +2091,7 @@ class ComputeManager(manager.Manager):
extra_usage_info={'image_name': image_name})
try:
rt = self._get_resource_tracker(node)
with rt.instance_claim(context, instance, limits):
with rt.instance_claim(context, instance, limits) as inst_claim:
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
@ -2101,6 +2102,7 @@ class ComputeManager(manager.Manager):
block_device_mapping) as resources:
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SPAWNING
instance.numa_topology = inst_claim.claimed_numa_topology
instance.save(expected_task_state=
task_states.BLOCK_DEVICE_MAPPING)
block_device_info = resources['block_device_info']

View File

@ -130,6 +130,7 @@ class ResourceTracker(object):
overhead=overhead, limits=limits)
self._set_instance_host_and_node(context, instance_ref)
instance_ref['numa_topology'] = claim.claimed_numa_topology
# Mark resources in-use and update stats
self._update_usage_from_instance(context, self.compute_node,
@ -593,9 +594,16 @@ class ResourceTracker(object):
instance['system_metadata'])
if itype:
host_topology = resources.get('numa_topology')
if host_topology:
host_topology = hardware.VirtNUMAHostTopology.from_json(
host_topology)
numa_topology = (
hardware.VirtNUMAInstanceTopology.get_constraints(
itype, image_meta))
numa_topology = (
hardware.VirtNUMAHostTopology.fit_instance_to_host(
host_topology, numa_topology))
usage = self._get_usage_dict(
itype, numa_topology=numa_topology)
if self.pci_tracker:

View File

@ -28,34 +28,28 @@ class NUMATopologyFilter(filters.BaseHostFilter):
cpu_ratio = CONF.cpu_allocation_ratio
request_spec = filter_properties.get('request_spec', {})
instance = request_spec.get('instance_properties', {})
instance_topology = hardware.instance_topology_from_instance(instance)
requested_topology = hardware.instance_topology_from_instance(instance)
host_topology, _fmt = hardware.host_topology_and_format_from_host(
host_state)
if instance_topology:
if host_topology:
if not hardware.VirtNUMAHostTopology.can_fit_instances(
host_topology, [instance_topology]):
return False
limit_cells = []
usage_after_instance = (
hardware.VirtNUMAHostTopology.usage_from_instances(
host_topology, [instance_topology]))
for cell in usage_after_instance.cells:
max_cell_memory = int(cell.memory * ram_ratio)
max_cell_cpu = len(cell.cpuset) * cpu_ratio
if (cell.memory_usage > max_cell_memory or
cell.cpu_usage > max_cell_cpu):
return False
limit_cells.append(
hardware.VirtNUMATopologyCellLimit(
cell.id, cell.cpuset, cell.memory,
max_cell_cpu, max_cell_memory))
host_state.limits['numa_topology'] = (
hardware.VirtNUMALimitTopology(
cells=limit_cells).to_json())
return True
else:
if requested_topology and host_topology:
limit_cells = []
for cell in host_topology.cells:
max_cell_memory = int(cell.memory * ram_ratio)
max_cell_cpu = len(cell.cpuset) * cpu_ratio
limit_cells.append(hardware.VirtNUMATopologyCellLimit(
cell.id, cell.cpuset, cell.memory,
max_cell_cpu, max_cell_memory))
limits = hardware.VirtNUMALimitTopology(cells=limit_cells)
instance_topology = (
hardware.VirtNUMAHostTopology.fit_instance_to_host(
host_topology, requested_topology,
limits_topology=limits))
if not instance_topology:
return False
host_state.limits['numa_topology'] = limits.to_json()
instance['numa_topology'] = instance_topology.to_json()
return True
elif requested_topology:
return False
else:
return True

View File

@ -244,7 +244,7 @@ class ClaimTestCase(test.NoDBTestCase):
def test_numa_topology_no_limit(self, mock_get):
huge_instance = hardware.VirtNUMAInstanceTopology(
cells=[hardware.VirtNUMATopologyCell(
1, set([1, 2, 3, 4, 5]), 2048)])
1, set([1, 2]), 512)])
self._claim(numa_topology=huge_instance)
def test_numa_topology_fails(self, mock_get):
@ -264,7 +264,7 @@ class ClaimTestCase(test.NoDBTestCase):
def test_numa_topology_passes(self, mock_get):
huge_instance = hardware.VirtNUMAInstanceTopology(
cells=[hardware.VirtNUMATopologyCell(
1, set([1, 2, 3, 4, 5]), 2048)])
1, set([1, 2]), 512)])
limit_topo = hardware.VirtNUMALimitTopology(
cells=[hardware.VirtNUMATopologyCellLimit(
1, [1, 2], 512, cpu_limit=5, memory_limit=4096),

View File

@ -317,6 +317,7 @@ class BaseTestCase(test.TestCase):
inst['updated_at'] = timeutils.utcnow()
inst['launched_at'] = timeutils.utcnow()
inst['security_groups'] = []
inst['numa_topology'] = None
inst.update(params)
if services:
_create_service_entries(self.context.elevated(),

View File

@ -862,8 +862,8 @@ class InstanceClaimTestCase(BaseTrackerTestCase):
memory_mb = FAKE_VIRT_MEMORY_MB * 2
root_gb = ephemeral_gb = FAKE_VIRT_LOCAL_GB
vcpus = FAKE_VIRT_VCPUS * 2
claim_topology = self._claim_topology(memory_mb)
instance_topology = self._instance_topology(memory_mb)
claim_topology = self._claim_topology(3)
instance_topology = self._instance_topology(3)
limits = {'memory_mb': memory_mb + FAKE_VIRT_MEMORY_OVERHEAD,
'disk_gb': root_gb * 2,

View File

@ -1040,12 +1040,11 @@ def instance_topology_from_instance(instance):
# Remove when request_spec is a proper object itself!
dict_cells = instance_numa_topology.get('cells')
if dict_cells:
cells = [objects.InstanceNUMACell(id=cell['id'],
cpuset=set(cell['cpuset']),
memory=cell['memory'])
cells = [VirtNUMATopologyCell(cell['id'],
set(cell['cpuset']),
cell['memory'])
for cell in dict_cells]
instance_numa_topology = (
objects.InstanceNUMATopology(cells=cells))
instance_numa_topology = VirtNUMAInstanceTopology(cells=cells)
return instance_numa_topology