Granular requests to get_allocation_candidates

nova.scheduler.utils.ResourceRequest now has a to_querystring method
that produces a querystring in the format expected by the GET
/allocation_candidates API.  The report client now uses this method to
invoke said API.

Change-Id: I496e8d64907fdcb0e2da255725aed1fc529725f2
blueprint: granular-resource-requests
This commit is contained in:
Eric Fried 2017-10-30 18:57:07 -05:00
parent 549e5a2226
commit 9ef4d7662e
5 changed files with 189 additions and 64 deletions

View File

@ -25,7 +25,6 @@ import os_traits
from oslo_log import log as logging
from oslo_middleware import request_id
from oslo_utils import versionutils
from six.moves.urllib import parse
from nova.compute import provider_tree
from nova.compute import utils as compute_utils
@ -46,6 +45,7 @@ _RE_INV_IN_USE = re.compile("Inventory for (.+) on resource provider "
"(.+) in use")
WARN_EVERY = 10
PLACEMENT_CLIENT_SEMAPHORE = 'placement_client'
GRANULAR_AC_VERSION = '1.25'
POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20'
NESTED_PROVIDER_API_VERSION = '1.14'
POST_ALLOCATIONS_API_VERSION = '1.13'
@ -333,39 +333,9 @@ class SchedulerReportClient(object):
"Candidates are in either 'foo' or 'bar', but definitely in 'baz'"
"""
# TODO(efried): For now, just use the unnumbered group to retain
# existing behavior. Once the GET /allocation_candidates API is
# prepped to accept the whole shebang, we'll join up all the resources
# and traits in the query string (via a new method on ResourceRequest).
res = resources.get_request_group(None).resources
required_traits = resources.get_request_group(None).required_traits
forbidden_traits = resources.get_request_group(None).forbidden_traits
aggregates = resources.get_request_group(None).member_of
resource_query = ",".join(
sorted("%s:%s" % (rc, amount)
for (rc, amount) in res.items()))
qs_params = [
('resources', resource_query),
('limit', CONF.scheduler.max_placement_results),
]
required_traits_params = []
if required_traits:
required_traits_params.extend(required_traits)
if forbidden_traits:
# Sorted to make testing easier to manage and for
# predictability.
required_traits_params.extend(
['!%s' % trait for trait in sorted(forbidden_traits)])
if required_traits_params:
qs_params.append(('required', ','.join(required_traits_params)))
if aggregates:
for agg_list in aggregates:
qs_params.append(('member_of', 'in:%s' % ','.join(agg_list)))
version = '1.24'
url = "/allocation_candidates?%s" % parse.urlencode(qs_params)
version = GRANULAR_AC_VERSION
qparams = resources.to_querystring()
url = "/allocation_candidates?%s" % qparams
resp = self.get(url, version=version,
global_request_id=context.global_id)
if resp.status_code == 200:

View File

@ -22,6 +22,7 @@ import sys
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
from six.moves.urllib import parse
from nova.api.openstack.placement import lib as placement_lib
from nova.compute import flavors
@ -56,6 +57,8 @@ class ResourceRequest(object):
def __init__(self):
# { ident: RequestGroup }
self._rg_by_id = {}
self._group_policy = None
self._limit = CONF.scheduler.max_placement_results
def __str__(self):
return ', '.join(sorted(
@ -84,7 +87,7 @@ class ResourceRequest(object):
LOG.warning(
"Resource amounts must be nonnegative integers. Received "
"'%(val)s' for key resources%(groupid)s.",
{"groupid": groupid, "val": amount})
{"groupid": groupid or '', "val": amount})
return
self.get_request_group(groupid).resources[rclass] = amount
@ -100,10 +103,19 @@ class ResourceRequest(object):
LOG.warning(
"Only (%(tvals)s) traits are supported. Received '%(val)s' "
"for key trait%(groupid)s.",
{"tvals": ', '.join(trait_vals), "groupid": groupid,
{"tvals": ', '.join(trait_vals), "groupid": groupid or '',
"val": trait_type})
return
def _add_group_policy(self, policy):
# The only valid values for group_policy are 'none' and 'isolate'.
if policy not in ('none', 'isolate'):
LOG.warning(
"Invalid group_policy '%s'. Valid values are 'none' and "
"'isolate'.", policy)
return
self._group_policy = policy
@classmethod
def from_extra_specs(cls, extra_specs, req=None):
"""Processes resources and traits in numbered groupings in extra_specs.
@ -114,18 +126,27 @@ class ResourceRequest(object):
"trait:$TRAIT_NAME": "required"
"trait$N:$TRAIT_NAME": "required"
Does *not* yet handle member_of[$N].
:param extra_specs: The flavor extra_specs dict.
:param req: the ResourceRequest object to add the requirements to or
None to create a new ResourceRequest
:return: A ResourceRequest object representing the resources and
required traits in the extra_specs.
"""
# TODO(efried): Handle member_of[$N], which will need to be reconciled
# with destination.aggregates handling in resources_from_request_spec
if req is not None:
ret = req
else:
ret = cls()
for key, val in extra_specs.items():
if key == 'group_policy':
ret._add_group_policy(val)
continue
match = cls.XS_KEYPAT.match(key)
if not match:
continue
@ -217,6 +238,51 @@ class ResourceRequest(object):
resource_dict.pop(rclass)
self._clean_empties()
def to_querystring(self):
"""Produce a querystring of the form expected by
GET /allocation_candidates.
"""
# NOTE(efried): The sorting herein is not necessary for the API; it is
# to make testing easier and logging/debugging predictable.
def to_queryparams(request_group, suffix):
res = request_group.resources
required_traits = request_group.required_traits
forbidden_traits = request_group.forbidden_traits
aggregates = request_group.member_of
resource_query = ",".join(
sorted("%s:%s" % (rc, amount)
for (rc, amount) in res.items()))
qs_params = [('resources%s' % suffix, resource_query)]
# Assemble required and forbidden traits, allowing for either/both
# to be empty.
required_val = ','.join(
sorted(required_traits) +
['!%s' % ft for ft in sorted(forbidden_traits)])
if required_val:
qs_params.append(('required%s' % suffix, required_val))
if aggregates:
aggs = []
# member_ofN is a list of lists. We need a tuple of
# ('member_ofN', 'in:uuid,uuid,...') for each inner list.
for agglist in aggregates:
aggs.append(('member_of%s' % suffix,
'in:' + ','.join(sorted(agglist))))
qs_params.extend(sorted(aggs))
return qs_params
qparams = [('limit', self._limit)]
if self._group_policy is not None:
qparams.append(('group_policy', self._group_policy))
for ident, rg in self._rg_by_id.items():
# [('resourcesN', 'rclass:amount,rclass:amount,...'),
# ('requiredN', 'trait_name,!trait_name,...'),
# ('member_ofN', 'in:uuid,uuid,...'),
# ('member_ofN', 'in:uuid,uuid,...')]
qparams.extend(to_queryparams(rg, ident or ''))
return parse.urlencode(sorted(qparams))
def build_request_spec(image, instances, instance_type=None):
"""Build a request_spec for the scheduler.

View File

@ -1451,40 +1451,63 @@ class TestProviderOperations(SchedulerReportClientTestCase):
resources = scheduler_utils.ResourceRequest.from_extra_specs({
'resources:VCPU': '1',
'resources:MEMORY_MB': '1024',
'resources1:DISK_GB': '30',
'trait:HW_CPU_X86_AVX': 'required',
'trait:CUSTOM_TRAIT1': 'required',
'trait:CUSTOM_TRAIT2': 'preferred',
'trait:CUSTOM_TRAIT3': 'forbidden',
'trait:CUSTOM_TRAIT4': 'forbidden',
'resources1:DISK_GB': '30',
'trait1:STORAGE_DISK_SSD': 'required',
'resources2:VGPU': '2',
'trait2:HW_GPU_RESOLUTION_W2560H1600': 'required',
'trait2:HW_GPU_API_VULKAN': 'required',
'resources3:SRIOV_NET_VF': '1',
'resources3:CUSTOM_NET_EGRESS_BYTES_SEC': '125000',
'group_policy': 'isolate',
# These are ignored because misspelled, bad value, etc.
'resources02:CUSTOM_WIDGET': '123',
'trait:HW_NIC_OFFLOAD_LRO': 'preferred',
'group_policy3': 'none',
})
resources.get_request_group(None).member_of = [
('agg1', 'agg2', 'agg3'), ('agg1', 'agg2')]
expected_path = '/allocation_candidates'
expected_query = {
'resources': ['MEMORY_MB:1024,VCPU:1'],
'required': ['CUSTOM_TRAIT1,!CUSTOM_TRAIT3,!CUSTOM_TRAIT4'],
'member_of': ['in:agg1,agg2,agg3', 'in:agg1,agg2'],
'limit': ['1000']
}
expected_query = [
('group_policy', 'isolate'),
('limit', '1000'),
('member_of', 'in:agg1,agg2'),
('member_of', 'in:agg1,agg2,agg3'),
('required', 'CUSTOM_TRAIT1,HW_CPU_X86_AVX,!CUSTOM_TRAIT3,'
'!CUSTOM_TRAIT4'),
('required1', 'STORAGE_DISK_SSD'),
('required2', 'HW_GPU_API_VULKAN,HW_GPU_RESOLUTION_W2560H1600'),
('resources', 'MEMORY_MB:1024,VCPU:1'),
('resources1', 'DISK_GB:30'),
('resources2', 'VGPU:2'),
('resources3', 'CUSTOM_NET_EGRESS_BYTES_SEC:125000,SRIOV_NET_VF:1')
]
resp_mock.json.return_value = json_data
self.ks_adap_mock.get.return_value = resp_mock
alloc_reqs, p_sums, allocation_request_version = \
self.client.get_allocation_candidates(self.context, resources)
alloc_reqs, p_sums, allocation_request_version = (
self.client.get_allocation_candidates(self.context, resources))
self.ks_adap_mock.get.assert_called_once_with(
mock.ANY, raise_exc=False, microversion='1.24',
headers={'X-Openstack-Request-Id': self.context.global_id})
url = self.ks_adap_mock.get.call_args[0][0]
split_url = parse.urlsplit(url)
query = parse.parse_qs(split_url.query)
query = parse.parse_qsl(split_url.query)
self.assertEqual(expected_path, split_url.path)
self.assertEqual(expected_query, query)
expected_url = '/allocation_candidates?%s' % parse.urlencode(
expected_query)
self.ks_adap_mock.get.assert_called_once_with(
expected_url, raise_exc=False, microversion='1.25',
headers={'X-Openstack-Request-Id': self.context.global_id})
self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs)
self.assertEqual(mock.sentinel.p_sums, p_sums)
def test_get_allocation_candidates_with_no_trait(self):
def test_get_ac_no_trait_bogus_group_policy_custom_limit(self):
self.flags(max_placement_results=42, group='scheduler')
resp_mock = mock.Mock(status_code=200)
json_data = {
'allocation_requests': mock.sentinel.alloc_reqs,
@ -1494,26 +1517,33 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'resources:VCPU': '1',
'resources:MEMORY_MB': '1024',
'resources1:DISK_GB': '30',
'group_policy': 'bogus',
})
expected_path = '/allocation_candidates'
expected_query = {'resources': ['MEMORY_MB:1024,VCPU:1'],
'limit': ['1000']}
expected_query = [
('limit', '42'),
('resources', 'MEMORY_MB:1024,VCPU:1'),
('resources1', 'DISK_GB:30'),
]
resp_mock.json.return_value = json_data
self.ks_adap_mock.get.return_value = resp_mock
alloc_reqs, p_sums, allocation_request_version = \
self.client.get_allocation_candidates(self.context, resources)
alloc_reqs, p_sums, allocation_request_version = (
self.client.get_allocation_candidates(self.context, resources))
self.ks_adap_mock.get.assert_called_once_with(
mock.ANY, raise_exc=False, microversion='1.24',
headers={'X-Openstack-Request-Id': self.context.global_id})
url = self.ks_adap_mock.get.call_args[0][0]
split_url = parse.urlsplit(url)
query = parse.parse_qs(split_url.query)
query = parse.parse_qsl(split_url.query)
self.assertEqual(expected_path, split_url.path)
self.assertEqual(expected_query, query)
expected_url = '/allocation_candidates?%s' % parse.urlencode(
expected_query)
self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs)
self.ks_adap_mock.get.assert_called_once_with(
expected_url, raise_exc=False, microversion='1.25',
headers={'X-Openstack-Request-Id': self.context.global_id})
self.assertEqual(mock.sentinel.p_sums, p_sums)
def test_get_allocation_candidates_not_found(self):
# Ensure _get_resource_provider() just returns None when the placement
@ -1533,7 +1563,7 @@ class TestProviderOperations(SchedulerReportClientTestCase):
res = self.client.get_allocation_candidates(self.context, resources)
self.ks_adap_mock.get.assert_called_once_with(
mock.ANY, raise_exc=False, microversion='1.24',
mock.ANY, raise_exc=False, microversion='1.25',
headers={'X-Openstack-Request-Id': self.context.global_id})
url = self.ks_adap_mock.get.call_args[0][0]
split_url = parse.urlsplit(url)

View File

@ -30,6 +30,8 @@ class TestUtils(test.NoDBTestCase):
self.context = nova_context.get_admin_context()
def assertResourceRequestsEqual(self, expected, observed):
self.assertEqual(expected._limit, observed._limit)
self.assertEqual(expected._group_policy, observed._group_policy)
ex_by_id = expected._rg_by_id
ob_by_id = observed._rg_by_id
self.assertEqual(set(ex_by_id), set(ob_by_id))
@ -53,6 +55,7 @@ class TestUtils(test.NoDBTestCase):
fake_spec = objects.RequestSpec(flavor=flavor, image=image)
resources = utils.resources_from_request_spec(fake_spec)
self.assertResourceRequestsEqual(expected, resources)
return resources
def test_resources_from_request_spec_flavor_only(self):
flavor = objects.Flavor(vcpus=1,
@ -236,8 +239,10 @@ class TestUtils(test.NoDBTestCase):
'resources:IPV4_ADDRESS': '0',
'resources:CUSTOM_FOO': '0',
# Bogus values don't make it through
'resources1:MEMORY_MB': 'bogus'})
'resources1:MEMORY_MB': 'bogus',
'group_policy': 'none'})
expected_resources = utils.ResourceRequest()
expected_resources._group_policy = 'none'
expected_resources._rg_by_id[None] = plib.RequestGroup(
use_same_provider=False,
resources={
@ -270,7 +275,19 @@ class TestUtils(test.NoDBTestCase):
'SRIOV_NET_VF': 1,
}
)
self._test_resources_from_request_spec(expected_resources, flavor)
rr = self._test_resources_from_request_spec(expected_resources, flavor)
expected_querystring = (
'group_policy=none&'
'limit=1000&'
'required3=CUSTOM_GOLD%2CCUSTOM_SILVER&'
'resources=CUSTOM_THING%3A123%2CDISK_GB%3A10&'
'resources1=VGPU%3A1%2CVGPU_DISPLAY_HEAD%3A2&'
'resources24=SRIOV_NET_VF%3A2&'
'resources3=VCPU%3A2&'
'resources42=SRIOV_NET_VF%3A1'
)
self.assertEqual(expected_querystring, rr.to_querystring())
def test_resources_from_request_spec_aggregates(self):
destination = objects.Destination()
@ -384,8 +401,8 @@ class TestUtils(test.NoDBTestCase):
self.assertEqual(expected, actual)
@mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False)
def test_resources_from_flavor_with_override(self, mock_is_bfv):
new=mock.Mock(return_value=False))
def test_resources_from_flavor_with_override(self):
flavor = objects.Flavor(
vcpus=1, memory_mb=1024, root_gb=10, ephemeral_gb=5, swap=1024,
extra_specs={
@ -404,7 +421,8 @@ class TestUtils(test.NoDBTestCase):
'resources86:MEMORY_MB': 0,
# Standard and custom zeroes don't make it through
'resources:IPV4_ADDRESS': 0,
'resources:CUSTOM_FOO': 0})
'resources:CUSTOM_FOO': 0,
'group_policy': 'none'})
instance = objects.Instance()
expected = {
'VCPU': 2,
@ -445,9 +463,11 @@ class TestUtils(test.NoDBTestCase):
'traitFoo:HW_NIC_ACCEL_SSL': 'required',
# Solo resource, no corresponding traits
'resources3:DISK_GB': '5',
'group_policy': 'isolate',
}
# Build up a ResourceRequest from the inside to compare against.
expected = utils.ResourceRequest()
expected._group_policy = 'isolate'
expected._rg_by_id[None] = plib.RequestGroup(
use_same_provider=False,
resources={
@ -489,8 +509,21 @@ class TestUtils(test.NoDBTestCase):
'DISK_GB': 5,
}
)
rr = utils.ResourceRequest.from_extra_specs(extra_specs)
self.assertResourceRequestsEqual(expected, rr)
expected_querystring = (
'group_policy=isolate&'
'limit=1000&'
'required=CUSTOM_MAGIC%2CHW_CPU_X86_AVX%2C%21CUSTOM_BRONZE&'
'required1=CUSTOM_PHYSNET_NET1%2C%21CUSTOM_PHYSNET_NET2&'
'required2=CUSTOM_PHYSNET_NET2%2CHW_NIC_ACCEL_SSL&'
'resources=MEMORY_MB%3A2048%2CVCPU%3A2&'
'resources1=IPV4_ADDRESS%3A1%2CSRIOV_NET_VF%3A1&'
'resources2=IPV4_ADDRESS%3A2%2CSRIOV_NET_VF%3A1&'
'resources3=DISK_GB%3A5'
)
self.assertEqual(expected_querystring, rr.to_querystring())
# Test stringification
self.assertEqual(

View File

@ -0,0 +1,26 @@
---
features:
- |
Added support for granular resource and traits requests to the scheduler.
A flavor extra spec is extended to support specifying numbered groupings of
resources and required/forbidden traits. A ``resources`` key with a
positive integer suffix (e.g. ``resources42:VCPU``) will be logically
associated with ``trait`` keys with the same suffix (e.g.
``trait42:HW_CPU_X86_AVX``). The resources and required/forbidden traits
in that group will be satisfied by the same resource provider on the host
selected by the scheduler. When more than one numbered grouping is
supplied, the ``group_policy`` extra spec is required to indicate how the
groups should interact. With ``group_policy=none``, separate groupings -
numbered or unnumbered - may or may not be satisfied by the same provider.
With ``group_policy=isolate``, numbered groups are guaranteed to be
satisfied by *different* providers - though there may still be overlap with
the unnumbered group.
``trait`` keys for a given group are optional. That is, you may specify
``resources42:XXX`` without a corresponding ``trait42:YYY``. However, the
reverse (specifying ``trait42:YYY`` without ``resources42:XXX``) will
result in an error.
The semantic of the (unnumbered) ``resources`` and ``trait`` keys is
unchanged: the resources and traits specified thereby may be satisfied by
any provider on the same host or associated via aggregate.