scheduler: re-calculate NUMA on consume_from_instance
This patch narrows down the race window between the filter running and
the consumption of resources from the instance after the host has been
chosen.
It does so by re-calculating the fitted NUMA topology just before consuming it
from the chosen host. Thus we avoid any locking, but also make sure that
the host_state is kept as up to date as possible for concurrent
requests, as there is no opportunity for switching threads inside a
consume_from_instance.
Several things worth noting:
* Scheduler being lock free (and thus racy) does not really affect
resources other than PCI and NUMA topology this badly - this is due
to complexity of said resources. In order for scheduler decesions to not
be based on basically guessing, in case of those two we will likely need
to introduce either locking or special heuristics.
* There is a lot of repeated code between the 'consume_from_instance'
method and the actual filters. This situation should really be fixed but
is out of scope for this bug fix (which is about preventing valid
requests failing because of races in the scheduler).
Change-Id: If0c7ad20506c9dddf4dec1eb64c9d6dd4fb75633
Closes-bug: #1438238
(cherry picked from commit d6b3156a6c
)
This commit is contained in:
parent
22d7547c6b
commit
880a356e40
|
@ -47,7 +47,6 @@ class NUMATopologyFilter(filters.BaseHostFilter):
|
|||
if not instance_topology:
|
||||
return False
|
||||
host_state.limits['numa_topology'] = limits
|
||||
host_state.instance_numa_topology = instance_topology
|
||||
return True
|
||||
elif requested_topology:
|
||||
return False
|
||||
|
|
|
@ -125,8 +125,8 @@ class HostState(object):
|
|||
self.free_disk_mb = 0
|
||||
self.vcpus_total = 0
|
||||
self.vcpus_used = 0
|
||||
self.pci_stats = None
|
||||
self.numa_topology = None
|
||||
self.instance_numa_topology = None
|
||||
|
||||
# Additional host information from the compute node stats:
|
||||
self.num_instances = 0
|
||||
|
@ -212,7 +212,6 @@ class HostState(object):
|
|||
self.vcpus_used = compute.vcpus_used
|
||||
self.updated = compute.updated_at
|
||||
self.numa_topology = compute.numa_topology
|
||||
self.instance_numa_topology = None
|
||||
self.pci_stats = pci_stats.PciDeviceStats(
|
||||
compute.pci_device_pools)
|
||||
|
||||
|
@ -269,14 +268,18 @@ class HostState(object):
|
|||
# to a primitive early on, and is thus a dict here. Convert this when
|
||||
# we get an object all the way to this path.
|
||||
if pci_requests and pci_requests['requests'] and self.pci_stats:
|
||||
self.pci_stats.apply_requests(pci_requests.requests,
|
||||
instance_cells)
|
||||
pci_requests = pci_requests.requests
|
||||
self.pci_stats.apply_requests(pci_requests, instance_cells)
|
||||
|
||||
# Calculate the numa usage
|
||||
instance['numa_topology'] = self.instance_numa_topology
|
||||
updated_numa_topology = hardware.get_host_numa_usage_from_instance(
|
||||
host_numa_topology, _fmt = hardware.host_topology_and_format_from_host(
|
||||
self)
|
||||
instance['numa_topology'] = hardware.numa_fit_instance_to_host(
|
||||
host_numa_topology, instance_numa_topology,
|
||||
limits=self.limits.get('numa_topology'),
|
||||
pci_requests=pci_requests, pci_stats=self.pci_stats)
|
||||
self.numa_topology = hardware.get_host_numa_usage_from_instance(
|
||||
self, instance)
|
||||
self.numa_topology = updated_numa_topology
|
||||
|
||||
vm_state = instance.get('vm_state', vm_states.BUILDING)
|
||||
task_state = instance.get('task_state')
|
||||
|
|
|
@ -42,8 +42,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
{'numa_topology': fakes.NUMA_TOPOLOGY,
|
||||
'pci_stats': None})
|
||||
self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsInstance(host.instance_numa_topology,
|
||||
objects.InstanceNUMATopology)
|
||||
|
||||
def test_numa_topology_filter_numa_instance_no_numa_host_fail(self):
|
||||
instance_topology = objects.InstanceNUMATopology(
|
||||
|
@ -59,7 +57,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
obj_base.obj_to_primitive(instance))}}
|
||||
host = fakes.FakeHostState('host1', 'node1', {'pci_stats': None})
|
||||
self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
||||
def test_numa_topology_filter_numa_host_no_numa_instance_pass(self):
|
||||
instance = fake_instance.fake_instance_obj(mock.sentinel.ctx)
|
||||
|
@ -71,7 +68,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
host = fakes.FakeHostState('host1', 'node1',
|
||||
{'numa_topology': fakes.NUMA_TOPOLOGY})
|
||||
self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
||||
def test_numa_topology_filter_fail_fit(self):
|
||||
instance_topology = objects.InstanceNUMATopology(
|
||||
|
@ -89,7 +85,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
{'numa_topology': fakes.NUMA_TOPOLOGY,
|
||||
'pci_stats': None})
|
||||
self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
||||
def test_numa_topology_filter_fail_memory(self):
|
||||
self.flags(ram_allocation_ratio=1)
|
||||
|
@ -109,7 +104,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
{'numa_topology': fakes.NUMA_TOPOLOGY,
|
||||
'pci_stats': None})
|
||||
self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
||||
def test_numa_topology_filter_fail_cpu(self):
|
||||
self.flags(cpu_allocation_ratio=1)
|
||||
|
@ -128,7 +122,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
{'numa_topology': fakes.NUMA_TOPOLOGY,
|
||||
'pci_stats': None})
|
||||
self.assertFalse(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
||||
def test_numa_topology_filter_pass_set_limit(self):
|
||||
self.flags(cpu_allocation_ratio=21)
|
||||
|
@ -148,8 +141,6 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
|
|||
{'numa_topology': fakes.NUMA_TOPOLOGY,
|
||||
'pci_stats': None})
|
||||
self.assertTrue(self.filt_cls.host_passes(host, filter_properties))
|
||||
self.assertIsInstance(host.instance_numa_topology,
|
||||
objects.InstanceNUMATopology)
|
||||
limits = host.limits['numa_topology']
|
||||
self.assertEqual(limits.cpu_allocation_ratio, 21)
|
||||
self.assertEqual(limits.ram_allocation_ratio, 1.3)
|
||||
|
|
|
@ -864,27 +864,43 @@ class HostStateTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(hyper_ver_int, host.hypervisor_version)
|
||||
|
||||
@mock.patch('nova.virt.hardware.get_host_numa_usage_from_instance')
|
||||
def test_stat_consumption_from_instance(self, numa_usage_mock):
|
||||
@mock.patch('nova.virt.hardware.numa_fit_instance_to_host')
|
||||
@mock.patch('nova.virt.hardware.instance_topology_from_instance')
|
||||
@mock.patch('nova.virt.hardware.host_topology_and_format_from_host')
|
||||
def test_stat_consumption_from_instance(self, host_topo_mock,
|
||||
instance_topo_mock,
|
||||
numa_fit_mock,
|
||||
numa_usage_mock):
|
||||
fake_numa_topology = mock.Mock()
|
||||
host_topo_mock.return_value = ('fake-host-topology', None)
|
||||
numa_usage_mock.return_value = 'fake-consumed-once'
|
||||
host = host_manager.HostState("fakehost", "fakenode")
|
||||
host.instance_numa_topology = 'fake-instance-topology'
|
||||
|
||||
numa_fit_mock.return_value = 'fake-fitted-once'
|
||||
instance_topo_mock.return_value = fake_numa_topology
|
||||
instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
|
||||
project_id='12345', vm_state=vm_states.BUILDING,
|
||||
task_state=task_states.SCHEDULING, os_type='Linux',
|
||||
uuid='fake-uuid', numa_topology=None)
|
||||
uuid='fake-uuid',
|
||||
numa_topology=fake_numa_topology)
|
||||
host = host_manager.HostState("fakehost", "fakenode")
|
||||
|
||||
host.consume_from_instance(instance)
|
||||
numa_fit_mock.assert_called_once_with('fake-host-topology',
|
||||
fake_numa_topology,
|
||||
limits=None, pci_requests=None,
|
||||
pci_stats=None)
|
||||
numa_usage_mock.assert_called_once_with(host, instance)
|
||||
self.assertEqual('fake-consumed-once', host.numa_topology)
|
||||
self.assertEqual('fake-instance-topology', instance['numa_topology'])
|
||||
self.assertEqual('fake-fitted-once', instance['numa_topology'])
|
||||
|
||||
numa_usage_mock.return_value = 'fake-consumed-twice'
|
||||
instance = dict(root_gb=0, ephemeral_gb=0, memory_mb=0, vcpus=0,
|
||||
project_id='12345', vm_state=vm_states.PAUSED,
|
||||
task_state=None, os_type='Linux',
|
||||
uuid='fake-uuid', numa_topology=None)
|
||||
uuid='fake-uuid',
|
||||
numa_topology=fake_numa_topology)
|
||||
numa_usage_mock.return_value = 'fake-consumed-twice'
|
||||
numa_fit_mock.return_value = 'fake-fitted-twice'
|
||||
host.consume_from_instance(instance)
|
||||
self.assertEqual('fake-instance-topology', instance['numa_topology'])
|
||||
self.assertEqual('fake-fitted-twice', instance['numa_topology'])
|
||||
|
||||
self.assertEqual(2, host.num_instances)
|
||||
self.assertEqual(1, host.num_io_ops)
|
||||
|
@ -926,21 +942,3 @@ class HostStateTestCase(test.NoDBTestCase):
|
|||
self.assertEqual('string2', host.metrics['res2'].value)
|
||||
self.assertEqual('source2', host.metrics['res2'].source)
|
||||
self.assertIsInstance(host.numa_topology, six.string_types)
|
||||
|
||||
def test_update_from_compute_node_resets_stashed_numa(self):
|
||||
hyper_ver_int = utils.convert_version_to_int('6.0.0')
|
||||
compute = objects.ComputeNode(
|
||||
memory_mb=0, free_disk_gb=0, local_gb=0, metrics=None,
|
||||
local_gb_used=0, free_ram_mb=0, vcpus=0, vcpus_used=0,
|
||||
disk_available_least=None,
|
||||
updated_at=None, host_ip='127.0.0.1',
|
||||
hypervisor_type='htype',
|
||||
hypervisor_hostname='hostname', cpu_info='cpu_info',
|
||||
supported_hv_specs=[],
|
||||
hypervisor_version=hyper_ver_int,
|
||||
numa_topology=fakes.NUMA_TOPOLOGY._to_json(),
|
||||
stats=None, pci_device_pools=None)
|
||||
host = host_manager.HostState("fakehost", "fakenode")
|
||||
host.instance_numa_topology = 'fake-instance-topology'
|
||||
host.update_from_compute_node(compute)
|
||||
self.assertIsNone(host.instance_numa_topology)
|
||||
|
|
Loading…
Reference in New Issue