From 9ef4d7662e4a31e05d155982e25d3d35e2362e3e Mon Sep 17 00:00:00 2001 From: Eric Fried Date: Mon, 30 Oct 2017 18:57:07 -0500 Subject: [PATCH] Granular requests to get_allocation_candidates nova.scheduler.utils.ResourceRequest now has a to_querystring method that produces a querystring in the format expected by the GET /allocation_candidates API. The report client now uses this method to invoke said API. Change-Id: I496e8d64907fdcb0e2da255725aed1fc529725f2 blueprint: granular-resource-requests --- nova/scheduler/client/report.py | 38 +--------- nova/scheduler/utils.py | 70 ++++++++++++++++- .../unit/scheduler/client/test_report.py | 76 +++++++++++++------ nova/tests/unit/scheduler/test_utils.py | 43 +++++++++-- ...granular-extra-specs-50b26b8f63717942.yaml | 26 +++++++ 5 files changed, 189 insertions(+), 64 deletions(-) create mode 100644 releasenotes/notes/granular-extra-specs-50b26b8f63717942.yaml diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 7f225af78518..8d2b6e34a60e 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -25,7 +25,6 @@ import os_traits from oslo_log import log as logging from oslo_middleware import request_id from oslo_utils import versionutils -from six.moves.urllib import parse from nova.compute import provider_tree from nova.compute import utils as compute_utils @@ -46,6 +45,7 @@ _RE_INV_IN_USE = re.compile("Inventory for (.+) on resource provider " "(.+) in use") WARN_EVERY = 10 PLACEMENT_CLIENT_SEMAPHORE = 'placement_client' +GRANULAR_AC_VERSION = '1.25' POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20' NESTED_PROVIDER_API_VERSION = '1.14' POST_ALLOCATIONS_API_VERSION = '1.13' @@ -333,39 +333,9 @@ class SchedulerReportClient(object): "Candidates are in either 'foo' or 'bar', but definitely in 'baz'" """ - # TODO(efried): For now, just use the unnumbered group to retain - # existing behavior. Once the GET /allocation_candidates API is - # prepped to accept the whole shebang, we'll join up all the resources - # and traits in the query string (via a new method on ResourceRequest). - res = resources.get_request_group(None).resources - required_traits = resources.get_request_group(None).required_traits - forbidden_traits = resources.get_request_group(None).forbidden_traits - aggregates = resources.get_request_group(None).member_of - - resource_query = ",".join( - sorted("%s:%s" % (rc, amount) - for (rc, amount) in res.items())) - qs_params = [ - ('resources', resource_query), - ('limit', CONF.scheduler.max_placement_results), - ] - required_traits_params = [] - if required_traits: - required_traits_params.extend(required_traits) - if forbidden_traits: - # Sorted to make testing easier to manage and for - # predictability. - required_traits_params.extend( - ['!%s' % trait for trait in sorted(forbidden_traits)]) - if required_traits_params: - qs_params.append(('required', ','.join(required_traits_params))) - - if aggregates: - for agg_list in aggregates: - qs_params.append(('member_of', 'in:%s' % ','.join(agg_list))) - - version = '1.24' - url = "/allocation_candidates?%s" % parse.urlencode(qs_params) + version = GRANULAR_AC_VERSION + qparams = resources.to_querystring() + url = "/allocation_candidates?%s" % qparams resp = self.get(url, version=version, global_request_id=context.global_id) if resp.status_code == 200: diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 4b16af76cf44..5f35242dacaa 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -22,6 +22,7 @@ import sys from oslo_log import log as logging import oslo_messaging as messaging from oslo_serialization import jsonutils +from six.moves.urllib import parse from nova.api.openstack.placement import lib as placement_lib from nova.compute import flavors @@ -56,6 +57,8 @@ class ResourceRequest(object): def __init__(self): # { ident: RequestGroup } self._rg_by_id = {} + self._group_policy = None + self._limit = CONF.scheduler.max_placement_results def __str__(self): return ', '.join(sorted( @@ -84,7 +87,7 @@ class ResourceRequest(object): LOG.warning( "Resource amounts must be nonnegative integers. Received " "'%(val)s' for key resources%(groupid)s.", - {"groupid": groupid, "val": amount}) + {"groupid": groupid or '', "val": amount}) return self.get_request_group(groupid).resources[rclass] = amount @@ -100,10 +103,19 @@ class ResourceRequest(object): LOG.warning( "Only (%(tvals)s) traits are supported. Received '%(val)s' " "for key trait%(groupid)s.", - {"tvals": ', '.join(trait_vals), "groupid": groupid, + {"tvals": ', '.join(trait_vals), "groupid": groupid or '', "val": trait_type}) return + def _add_group_policy(self, policy): + # The only valid values for group_policy are 'none' and 'isolate'. + if policy not in ('none', 'isolate'): + LOG.warning( + "Invalid group_policy '%s'. Valid values are 'none' and " + "'isolate'.", policy) + return + self._group_policy = policy + @classmethod def from_extra_specs(cls, extra_specs, req=None): """Processes resources and traits in numbered groupings in extra_specs. @@ -114,18 +126,27 @@ class ResourceRequest(object): "trait:$TRAIT_NAME": "required" "trait$N:$TRAIT_NAME": "required" + Does *not* yet handle member_of[$N]. + :param extra_specs: The flavor extra_specs dict. :param req: the ResourceRequest object to add the requirements to or None to create a new ResourceRequest :return: A ResourceRequest object representing the resources and required traits in the extra_specs. """ + # TODO(efried): Handle member_of[$N], which will need to be reconciled + # with destination.aggregates handling in resources_from_request_spec + if req is not None: ret = req else: ret = cls() for key, val in extra_specs.items(): + if key == 'group_policy': + ret._add_group_policy(val) + continue + match = cls.XS_KEYPAT.match(key) if not match: continue @@ -217,6 +238,51 @@ class ResourceRequest(object): resource_dict.pop(rclass) self._clean_empties() + def to_querystring(self): + """Produce a querystring of the form expected by + GET /allocation_candidates. + """ + # NOTE(efried): The sorting herein is not necessary for the API; it is + # to make testing easier and logging/debugging predictable. + def to_queryparams(request_group, suffix): + res = request_group.resources + required_traits = request_group.required_traits + forbidden_traits = request_group.forbidden_traits + aggregates = request_group.member_of + + resource_query = ",".join( + sorted("%s:%s" % (rc, amount) + for (rc, amount) in res.items())) + qs_params = [('resources%s' % suffix, resource_query)] + + # Assemble required and forbidden traits, allowing for either/both + # to be empty. + required_val = ','.join( + sorted(required_traits) + + ['!%s' % ft for ft in sorted(forbidden_traits)]) + if required_val: + qs_params.append(('required%s' % suffix, required_val)) + if aggregates: + aggs = [] + # member_ofN is a list of lists. We need a tuple of + # ('member_ofN', 'in:uuid,uuid,...') for each inner list. + for agglist in aggregates: + aggs.append(('member_of%s' % suffix, + 'in:' + ','.join(sorted(agglist)))) + qs_params.extend(sorted(aggs)) + return qs_params + + qparams = [('limit', self._limit)] + if self._group_policy is not None: + qparams.append(('group_policy', self._group_policy)) + for ident, rg in self._rg_by_id.items(): + # [('resourcesN', 'rclass:amount,rclass:amount,...'), + # ('requiredN', 'trait_name,!trait_name,...'), + # ('member_ofN', 'in:uuid,uuid,...'), + # ('member_ofN', 'in:uuid,uuid,...')] + qparams.extend(to_queryparams(rg, ident or '')) + return parse.urlencode(sorted(qparams)) + def build_request_spec(image, instances, instance_type=None): """Build a request_spec for the scheduler. diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index c19b10ed0ee6..688b315e6564 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -1451,40 +1451,63 @@ class TestProviderOperations(SchedulerReportClientTestCase): resources = scheduler_utils.ResourceRequest.from_extra_specs({ 'resources:VCPU': '1', 'resources:MEMORY_MB': '1024', - 'resources1:DISK_GB': '30', + 'trait:HW_CPU_X86_AVX': 'required', 'trait:CUSTOM_TRAIT1': 'required', 'trait:CUSTOM_TRAIT2': 'preferred', 'trait:CUSTOM_TRAIT3': 'forbidden', 'trait:CUSTOM_TRAIT4': 'forbidden', + 'resources1:DISK_GB': '30', + 'trait1:STORAGE_DISK_SSD': 'required', + 'resources2:VGPU': '2', + 'trait2:HW_GPU_RESOLUTION_W2560H1600': 'required', + 'trait2:HW_GPU_API_VULKAN': 'required', + 'resources3:SRIOV_NET_VF': '1', + 'resources3:CUSTOM_NET_EGRESS_BYTES_SEC': '125000', + 'group_policy': 'isolate', + # These are ignored because misspelled, bad value, etc. + 'resources02:CUSTOM_WIDGET': '123', + 'trait:HW_NIC_OFFLOAD_LRO': 'preferred', + 'group_policy3': 'none', }) resources.get_request_group(None).member_of = [ ('agg1', 'agg2', 'agg3'), ('agg1', 'agg2')] expected_path = '/allocation_candidates' - expected_query = { - 'resources': ['MEMORY_MB:1024,VCPU:1'], - 'required': ['CUSTOM_TRAIT1,!CUSTOM_TRAIT3,!CUSTOM_TRAIT4'], - 'member_of': ['in:agg1,agg2,agg3', 'in:agg1,agg2'], - 'limit': ['1000'] - } + expected_query = [ + ('group_policy', 'isolate'), + ('limit', '1000'), + ('member_of', 'in:agg1,agg2'), + ('member_of', 'in:agg1,agg2,agg3'), + ('required', 'CUSTOM_TRAIT1,HW_CPU_X86_AVX,!CUSTOM_TRAIT3,' + '!CUSTOM_TRAIT4'), + ('required1', 'STORAGE_DISK_SSD'), + ('required2', 'HW_GPU_API_VULKAN,HW_GPU_RESOLUTION_W2560H1600'), + ('resources', 'MEMORY_MB:1024,VCPU:1'), + ('resources1', 'DISK_GB:30'), + ('resources2', 'VGPU:2'), + ('resources3', 'CUSTOM_NET_EGRESS_BYTES_SEC:125000,SRIOV_NET_VF:1') + ] resp_mock.json.return_value = json_data self.ks_adap_mock.get.return_value = resp_mock - alloc_reqs, p_sums, allocation_request_version = \ - self.client.get_allocation_candidates(self.context, resources) + alloc_reqs, p_sums, allocation_request_version = ( + self.client.get_allocation_candidates(self.context, resources)) - self.ks_adap_mock.get.assert_called_once_with( - mock.ANY, raise_exc=False, microversion='1.24', - headers={'X-Openstack-Request-Id': self.context.global_id}) url = self.ks_adap_mock.get.call_args[0][0] split_url = parse.urlsplit(url) - query = parse.parse_qs(split_url.query) + query = parse.parse_qsl(split_url.query) self.assertEqual(expected_path, split_url.path) self.assertEqual(expected_query, query) + expected_url = '/allocation_candidates?%s' % parse.urlencode( + expected_query) + self.ks_adap_mock.get.assert_called_once_with( + expected_url, raise_exc=False, microversion='1.25', + headers={'X-Openstack-Request-Id': self.context.global_id}) self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs) self.assertEqual(mock.sentinel.p_sums, p_sums) - def test_get_allocation_candidates_with_no_trait(self): + def test_get_ac_no_trait_bogus_group_policy_custom_limit(self): + self.flags(max_placement_results=42, group='scheduler') resp_mock = mock.Mock(status_code=200) json_data = { 'allocation_requests': mock.sentinel.alloc_reqs, @@ -1494,26 +1517,33 @@ class TestProviderOperations(SchedulerReportClientTestCase): 'resources:VCPU': '1', 'resources:MEMORY_MB': '1024', 'resources1:DISK_GB': '30', + 'group_policy': 'bogus', }) expected_path = '/allocation_candidates' - expected_query = {'resources': ['MEMORY_MB:1024,VCPU:1'], - 'limit': ['1000']} + expected_query = [ + ('limit', '42'), + ('resources', 'MEMORY_MB:1024,VCPU:1'), + ('resources1', 'DISK_GB:30'), + ] resp_mock.json.return_value = json_data self.ks_adap_mock.get.return_value = resp_mock - alloc_reqs, p_sums, allocation_request_version = \ - self.client.get_allocation_candidates(self.context, resources) + alloc_reqs, p_sums, allocation_request_version = ( + self.client.get_allocation_candidates(self.context, resources)) - self.ks_adap_mock.get.assert_called_once_with( - mock.ANY, raise_exc=False, microversion='1.24', - headers={'X-Openstack-Request-Id': self.context.global_id}) url = self.ks_adap_mock.get.call_args[0][0] split_url = parse.urlsplit(url) - query = parse.parse_qs(split_url.query) + query = parse.parse_qsl(split_url.query) self.assertEqual(expected_path, split_url.path) self.assertEqual(expected_query, query) + expected_url = '/allocation_candidates?%s' % parse.urlencode( + expected_query) self.assertEqual(mock.sentinel.alloc_reqs, alloc_reqs) + self.ks_adap_mock.get.assert_called_once_with( + expected_url, raise_exc=False, microversion='1.25', + headers={'X-Openstack-Request-Id': self.context.global_id}) + self.assertEqual(mock.sentinel.p_sums, p_sums) def test_get_allocation_candidates_not_found(self): # Ensure _get_resource_provider() just returns None when the placement @@ -1533,7 +1563,7 @@ class TestProviderOperations(SchedulerReportClientTestCase): res = self.client.get_allocation_candidates(self.context, resources) self.ks_adap_mock.get.assert_called_once_with( - mock.ANY, raise_exc=False, microversion='1.24', + mock.ANY, raise_exc=False, microversion='1.25', headers={'X-Openstack-Request-Id': self.context.global_id}) url = self.ks_adap_mock.get.call_args[0][0] split_url = parse.urlsplit(url) diff --git a/nova/tests/unit/scheduler/test_utils.py b/nova/tests/unit/scheduler/test_utils.py index a9c321c5a9da..101c72a793cb 100644 --- a/nova/tests/unit/scheduler/test_utils.py +++ b/nova/tests/unit/scheduler/test_utils.py @@ -30,6 +30,8 @@ class TestUtils(test.NoDBTestCase): self.context = nova_context.get_admin_context() def assertResourceRequestsEqual(self, expected, observed): + self.assertEqual(expected._limit, observed._limit) + self.assertEqual(expected._group_policy, observed._group_policy) ex_by_id = expected._rg_by_id ob_by_id = observed._rg_by_id self.assertEqual(set(ex_by_id), set(ob_by_id)) @@ -53,6 +55,7 @@ class TestUtils(test.NoDBTestCase): fake_spec = objects.RequestSpec(flavor=flavor, image=image) resources = utils.resources_from_request_spec(fake_spec) self.assertResourceRequestsEqual(expected, resources) + return resources def test_resources_from_request_spec_flavor_only(self): flavor = objects.Flavor(vcpus=1, @@ -236,8 +239,10 @@ class TestUtils(test.NoDBTestCase): 'resources:IPV4_ADDRESS': '0', 'resources:CUSTOM_FOO': '0', # Bogus values don't make it through - 'resources1:MEMORY_MB': 'bogus'}) + 'resources1:MEMORY_MB': 'bogus', + 'group_policy': 'none'}) expected_resources = utils.ResourceRequest() + expected_resources._group_policy = 'none' expected_resources._rg_by_id[None] = plib.RequestGroup( use_same_provider=False, resources={ @@ -270,7 +275,19 @@ class TestUtils(test.NoDBTestCase): 'SRIOV_NET_VF': 1, } ) - self._test_resources_from_request_spec(expected_resources, flavor) + + rr = self._test_resources_from_request_spec(expected_resources, flavor) + expected_querystring = ( + 'group_policy=none&' + 'limit=1000&' + 'required3=CUSTOM_GOLD%2CCUSTOM_SILVER&' + 'resources=CUSTOM_THING%3A123%2CDISK_GB%3A10&' + 'resources1=VGPU%3A1%2CVGPU_DISPLAY_HEAD%3A2&' + 'resources24=SRIOV_NET_VF%3A2&' + 'resources3=VCPU%3A2&' + 'resources42=SRIOV_NET_VF%3A1' + ) + self.assertEqual(expected_querystring, rr.to_querystring()) def test_resources_from_request_spec_aggregates(self): destination = objects.Destination() @@ -384,8 +401,8 @@ class TestUtils(test.NoDBTestCase): self.assertEqual(expected, actual) @mock.patch('nova.compute.utils.is_volume_backed_instance', - return_value=False) - def test_resources_from_flavor_with_override(self, mock_is_bfv): + new=mock.Mock(return_value=False)) + def test_resources_from_flavor_with_override(self): flavor = objects.Flavor( vcpus=1, memory_mb=1024, root_gb=10, ephemeral_gb=5, swap=1024, extra_specs={ @@ -404,7 +421,8 @@ class TestUtils(test.NoDBTestCase): 'resources86:MEMORY_MB': 0, # Standard and custom zeroes don't make it through 'resources:IPV4_ADDRESS': 0, - 'resources:CUSTOM_FOO': 0}) + 'resources:CUSTOM_FOO': 0, + 'group_policy': 'none'}) instance = objects.Instance() expected = { 'VCPU': 2, @@ -445,9 +463,11 @@ class TestUtils(test.NoDBTestCase): 'traitFoo:HW_NIC_ACCEL_SSL': 'required', # Solo resource, no corresponding traits 'resources3:DISK_GB': '5', + 'group_policy': 'isolate', } # Build up a ResourceRequest from the inside to compare against. expected = utils.ResourceRequest() + expected._group_policy = 'isolate' expected._rg_by_id[None] = plib.RequestGroup( use_same_provider=False, resources={ @@ -489,8 +509,21 @@ class TestUtils(test.NoDBTestCase): 'DISK_GB': 5, } ) + rr = utils.ResourceRequest.from_extra_specs(extra_specs) self.assertResourceRequestsEqual(expected, rr) + expected_querystring = ( + 'group_policy=isolate&' + 'limit=1000&' + 'required=CUSTOM_MAGIC%2CHW_CPU_X86_AVX%2C%21CUSTOM_BRONZE&' + 'required1=CUSTOM_PHYSNET_NET1%2C%21CUSTOM_PHYSNET_NET2&' + 'required2=CUSTOM_PHYSNET_NET2%2CHW_NIC_ACCEL_SSL&' + 'resources=MEMORY_MB%3A2048%2CVCPU%3A2&' + 'resources1=IPV4_ADDRESS%3A1%2CSRIOV_NET_VF%3A1&' + 'resources2=IPV4_ADDRESS%3A2%2CSRIOV_NET_VF%3A1&' + 'resources3=DISK_GB%3A5' + ) + self.assertEqual(expected_querystring, rr.to_querystring()) # Test stringification self.assertEqual( diff --git a/releasenotes/notes/granular-extra-specs-50b26b8f63717942.yaml b/releasenotes/notes/granular-extra-specs-50b26b8f63717942.yaml new file mode 100644 index 000000000000..99e3b99db197 --- /dev/null +++ b/releasenotes/notes/granular-extra-specs-50b26b8f63717942.yaml @@ -0,0 +1,26 @@ +--- +features: + - | + Added support for granular resource and traits requests to the scheduler. + A flavor extra spec is extended to support specifying numbered groupings of + resources and required/forbidden traits. A ``resources`` key with a + positive integer suffix (e.g. ``resources42:VCPU``) will be logically + associated with ``trait`` keys with the same suffix (e.g. + ``trait42:HW_CPU_X86_AVX``). The resources and required/forbidden traits + in that group will be satisfied by the same resource provider on the host + selected by the scheduler. When more than one numbered grouping is + supplied, the ``group_policy`` extra spec is required to indicate how the + groups should interact. With ``group_policy=none``, separate groupings - + numbered or unnumbered - may or may not be satisfied by the same provider. + With ``group_policy=isolate``, numbered groups are guaranteed to be + satisfied by *different* providers - though there may still be overlap with + the unnumbered group. + + ``trait`` keys for a given group are optional. That is, you may specify + ``resources42:XXX`` without a corresponding ``trait42:YYY``. However, the + reverse (specifying ``trait42:YYY`` without ``resources42:XXX``) will + result in an error. + + The semantic of the (unnumbered) ``resources`` and ``trait`` keys is + unchanged: the resources and traits specified thereby may be satisfied by + any provider on the same host or associated via aggregate.