makes sure correct PCI device allocation

With this patch, on the compute node, a stats pool will be associated with a
list of devices that belongs to the pool. This makes sure that PCI devices are
allocated out of the same stats pools that are used by the nova scheduler to
satisfy the PCI requests. And therefore, stats pools on the compute nodes will
be kept in sync with their counterparts in the nova scheduler.

Change-Id: I2d97c6354215e2ac5ff659e3203c33771abe1c09
Closes-bug: #1288809
This commit is contained in:
Robert Li 2014-07-11 14:26:28 -04:00 committed by Baodong (Robert) Li
parent 6c098e6da5
commit fa2f139e22
4 changed files with 108 additions and 87 deletions

View File

@ -26,7 +26,6 @@ from nova.openstack.common import log as logging
from nova.pci import pci_device
from nova.pci import pci_request
from nova.pci import pci_stats
from nova.pci import pci_utils
LOG = logging.getLogger(__name__)
@ -74,46 +73,6 @@ class PciDevTracker(object):
elif dev['status'] == 'available':
self.stats.add_device(dev)
def _filter_devices_for_spec(self, request_spec, pci_devs):
return [p for p in pci_devs
if pci_utils.pci_device_prop_match(p, request_spec)]
def _get_free_devices_for_request(self, pci_request, pci_devs):
count = pci_request.get('count', 1)
spec = pci_request.get('spec', [])
devs = self._filter_devices_for_spec(spec, pci_devs)
if len(devs) < count:
return None
else:
return devs[:count]
@property
def free_devs(self):
return [dev for dev in self.pci_devs if dev.status == 'available']
def get_free_devices_for_requests(self, pci_requests):
"""Select free pci devices for requests
Pci_requests is a list of pci_request dictionaries. Each dictionary
has three keys:
count: number of pci devices required, default 1
spec: the pci properties that the devices should meet
alias_name: alias the pci_request is translated from, optional
If any single pci_request cannot find any free devices, then the
entire request list will fail.
"""
alloc = []
for request in pci_requests:
available = self._get_free_devices_for_request(
request,
[p for p in self.free_devs if p not in alloc])
if not available:
return []
alloc.extend(available)
return alloc
@property
def all_devs(self):
return self.pci_devs
@ -162,7 +121,7 @@ class PciDevTracker(object):
else:
# Note(yjiang5): no need to update stats if an assigned
# device is hot removed.
self.stats.consume_device(existed)
self.stats.remove_device(existed)
else:
new_value = next((dev for dev in devices if
dev['address'] == existed['address']))
@ -196,12 +155,11 @@ class PciDevTracker(object):
instance, prefix)
if not pci_requests:
return None
devs = self.get_free_devices_for_requests(pci_requests)
devs = self.stats.consume_requests(pci_requests)
if not devs:
raise exception.PciDeviceRequestFailed(pci_requests)
for dev in devs:
pci_device.claim(dev, instance)
self.stats.consume_device(dev)
return devs
def _allocate_instance(self, instance, devs):

View File

@ -17,6 +17,7 @@
import copy
from nova import exception
from nova.i18n import _LE
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova.pci import pci_utils
@ -70,8 +71,10 @@ class PciDeviceStats(object):
if not pool:
pool = dict((k, dev.get(k)) for k in self.pool_keys)
pool['count'] = 0
pool['devices'] = []
self.pools.append(pool)
pool['count'] += 1
pool['devices'].append(dev)
@staticmethod
def _decrease_pool_count(pool_list, pool, count=1):
@ -87,14 +90,56 @@ class PciDeviceStats(object):
pool_list.remove(pool)
return count
def consume_device(self, dev):
def remove_device(self, dev):
"""Remove one device from the first pool that it matches."""
pool = self._get_first_pool(dev)
if not pool:
raise exception.PciDevicePoolEmpty(
compute_node_id=dev.compute_node_id, address=dev.address)
pool['devices'].remove(dev)
self._decrease_pool_count(self.pools, pool)
def get_free_devs(self):
free_devs = []
for pool in self.pools:
free_devs.extend(pool['devices'])
return free_devs
def consume_requests(self, pci_requests):
alloc_devices = []
for request in pci_requests:
count = request.get('count', 1)
spec = request.get('spec', [])
# For now, keep the same algorithm as during scheduling:
# a spec may be able to match multiple pools.
pools = self._filter_pools_for_spec(self.pools, spec)
# Failed to allocate the required number of devices
# Return the devices already allocated back to their pools
if sum([pool['count'] for pool in pools]) < count:
LOG.error(_LE("Failed to allocate PCI devices for instance."
" Unassigning devices back to pools."
" This should not happen, since the scheduler"
" should have accurate information, and allocation"
" during claims is controlled via a hold"
" on the compute node semaphore"))
for d in range(len(alloc_devices)):
self.add_device(alloc_devices.pop())
raise exception.PciDeviceRequestFailed(requests=pci_requests)
for pool in pools:
if pool['count'] >= count:
num_alloc = count
else:
num_alloc = pool['count']
count -= num_alloc
pool['count'] -= num_alloc
for d in range(num_alloc):
pci_dev = pool['devices'].pop()
alloc_devices.append(pci_dev)
if count == 0:
break
return alloc_devices
@staticmethod
def _filter_pools_for_spec(pools, request_specs):
return [pool for pool in pools
@ -134,7 +179,12 @@ class PciDeviceStats(object):
raise exception.PciDeviceRequestFailed(requests=requests)
def __iter__(self):
return iter(self.pools)
# 'devices' shouldn't be part of stats
pools = []
for pool in self.pools:
tmp = dict((k, v) for k, v in pool.iteritems() if k != 'devices')
pools.append(tmp)
return iter(pools)
def clear(self):
"""Clear all the stats maintained."""

View File

@ -104,7 +104,8 @@ class PciDevTrackerTestCase(test.TestCase):
def test_pcidev_tracker_create(self):
self.assertEqual(len(self.tracker.pci_devs), 3)
self.assertEqual(len(self.tracker.free_devs), 3)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 3)
self.assertEqual(self.tracker.stale.keys(), [])
self.assertEqual(len(self.tracker.stats.pools), 2)
self.assertEqual(self.tracker.node_id, 1)
@ -113,23 +114,6 @@ class PciDevTrackerTestCase(test.TestCase):
self.tracker = pci_manager.PciDevTracker()
self.assertEqual(len(self.tracker.pci_devs), 0)
def test_get_free_devices_for_requests(self):
devs = self.tracker.get_free_devices_for_requests(fake_pci_requests)
self.assertEqual(len(devs), 2)
self.assertEqual(set([dev['vendor_id'] for dev in devs]),
set(['v1', 'v']))
def test_get_free_devices_for_requests_empty(self):
devs = self.tracker.get_free_devices_for_requests([])
self.assertEqual(len(devs), 0)
def test_get_free_devices_for_requests_meet_partial(self):
requests = copy.deepcopy(fake_pci_requests)
requests[1]['count'] = 2
requests[1]['spec'][0]['vendor_id'] = 'v'
devs = self.tracker.get_free_devices_for_requests(requests)
self.assertEqual(len(devs), 0)
def test_set_hvdev_new_dev(self):
fake_pci_3 = dict(fake_pci, address='0000:00:00.4', vendor_id='v2')
fake_pci_devs = [copy.deepcopy(fake_pci), copy.deepcopy(fake_pci_1),
@ -172,8 +156,9 @@ class PciDevTrackerTestCase(test.TestCase):
def test_update_pci_for_instance_active(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_instance(self.inst)
self.assertEqual(len(self.tracker.free_devs), 1)
self.assertEqual(self.tracker.free_devs[0]['vendor_id'], 'v')
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(free_devs[0]['vendor_id'], 'v')
def test_update_pci_for_instance_fail(self):
self.pci_requests = copy.deepcopy(fake_pci_requests)
@ -185,10 +170,12 @@ class PciDevTrackerTestCase(test.TestCase):
def test_update_pci_for_instance_deleted(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_instance(self.inst)
self.assertEqual(len(self.tracker.free_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.inst.vm_state = vm_states.DELETED
self.tracker.update_pci_for_instance(self.inst)
self.assertEqual(len(self.tracker.free_devs), 3)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 3)
self.assertEqual(set([dev['vendor_id'] for
dev in self.tracker.pci_devs]),
set(['v', 'v1']))
@ -196,15 +183,18 @@ class PciDevTrackerTestCase(test.TestCase):
def test_update_pci_for_instance_resize_source(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_instance(self.inst)
self.assertEqual(len(self.tracker.free_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.inst.task_state = task_states.RESIZE_MIGRATED
self.tracker.update_pci_for_instance(self.inst)
self.assertEqual(len(self.tracker.free_devs), 3)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 3)
def test_update_pci_for_instance_resize_dest(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_migration(self.inst)
self.assertEqual(len(self.tracker.free_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(len(self.tracker.claims['fake-inst-uuid']), 2)
self.assertNotIn('fake-inst-uuid', self.tracker.allocations)
self.inst.task_state = task_states.RESIZE_FINISH
@ -215,14 +205,16 @@ class PciDevTrackerTestCase(test.TestCase):
def test_update_pci_for_migration_in(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_migration(self.inst)
self.assertEqual(len(self.tracker.free_devs), 1)
self.assertEqual(self.tracker.free_devs[0]['vendor_id'], 'v')
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(free_devs[0]['vendor_id'], 'v')
def test_update_pci_for_migration_out(self):
self.pci_requests = fake_pci_requests
self.tracker.update_pci_for_migration(self.inst)
self.tracker.update_pci_for_migration(self.inst, sign=-1)
self.assertEqual(len(self.tracker.free_devs), 3)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 3)
self.assertEqual(set([dev['vendor_id'] for
dev in self.tracker.pci_devs]),
set(['v', 'v1']))
@ -276,13 +268,15 @@ class PciDevTrackerTestCase(test.TestCase):
self.tracker.update_pci_for_instance(self.inst)
self.pci_requests = [{'count': 1, 'spec': [{'vendor_id': 'v1'}]}]
self.tracker.update_pci_for_instance(inst_2)
self.assertEqual(len(self.tracker.free_devs), 1)
self.assertEqual(self.tracker.free_devs[0]['vendor_id'], 'v')
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.assertEqual(free_devs[0]['vendor_id'], 'v')
self.tracker.clean_usage([self.inst], [migr], [orph])
self.assertEqual(len(self.tracker.free_devs), 2)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 2)
self.assertEqual(
set([dev['vendor_id'] for dev in self.tracker.free_devs]),
set([dev['vendor_id'] for dev in free_devs]),
set(['v', 'v1']))
def test_clean_usage_claims(self):
@ -295,11 +289,13 @@ class PciDevTrackerTestCase(test.TestCase):
self.tracker.update_pci_for_instance(self.inst)
self.pci_requests = [{'count': 1, 'spec': [{'vendor_id': 'v1'}]}]
self.tracker.update_pci_for_migration(inst_2)
self.assertEqual(len(self.tracker.free_devs), 1)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 1)
self.tracker.clean_usage([self.inst], [migr], [orph])
self.assertEqual(len(self.tracker.free_devs), 2)
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(len(free_devs), 2)
self.assertEqual(
set([dev['vendor_id'] for dev in self.tracker.free_devs]),
set([dev['vendor_id'] for dev in free_devs]),
set(['v', 'v1']))
def test_clean_usage_no_request_match_no_claims(self):
@ -308,11 +304,13 @@ class PciDevTrackerTestCase(test.TestCase):
# calls clean_usage.
self.pci_requests = None
self.tracker.update_pci_for_migration(instance=self.inst, sign=1)
self.assertEqual(3, len(self.tracker.free_devs))
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(3, len(free_devs))
self.tracker.clean_usage([], [], [])
self.assertEqual(3, len(self.tracker.free_devs))
free_devs = self.tracker.pci_stats.get_free_devs()
self.assertEqual(3, len(free_devs))
self.assertEqual(
set([dev['address'] for dev in self.tracker.free_devs]),
set([dev['address'] for dev in free_devs]),
set(['0000:00:00.1', '0000:00:00.2', '0000:00:00.3']))

View File

@ -71,15 +71,15 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
set([1, 2]))
def test_remove_device(self):
self.pci_stats.consume_device(self.fake_dev_2)
self.pci_stats.remove_device(self.fake_dev_2)
self.assertEqual(len(self.pci_stats.pools), 1)
self.assertEqual(self.pci_stats.pools[0]['count'], 2)
self.assertEqual(self.pci_stats.pools[0]['vendor_id'], 'v1')
def test_remove_device_exception(self):
self.pci_stats.consume_device(self.fake_dev_2)
self.pci_stats.remove_device(self.fake_dev_2)
self.assertRaises(exception.PciDevicePoolEmpty,
self.pci_stats.consume_device,
self.pci_stats.remove_device,
self.fake_dev_2)
def test_json_creat(self):
@ -116,3 +116,18 @@ class PciDeviceStatsTestCase(test.NoDBTestCase):
self.assertRaises(exception.PciDeviceRequestFailed,
self.pci_stats.apply_requests,
pci_requests_multiple)
def test_consume_requests(self):
devs = self.pci_stats.consume_requests(pci_requests)
self.assertEqual(2, len(devs))
self.assertEqual(set(['v1', 'v2']),
set([dev['vendor_id'] for dev in devs]))
def test_consume_requests_empty(self):
devs = self.pci_stats.consume_requests([])
self.assertEqual(0, len(devs))
def test_consume_requests_failed(self):
self.assertRaises(exception.PciDeviceRequestFailed,
self.pci_stats.consume_requests,
pci_requests_multiple)