342 lines
17 KiB
Python
342 lines
17 KiB
Python
# Copyright (c) 2011 OpenStack Foundation
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
The FilterScheduler is for creating instances locally.
|
|
You can customize this scheduler by specifying your own Host Filters and
|
|
Weighing Functions.
|
|
"""
|
|
|
|
import random
|
|
|
|
from oslo_log import log as logging
|
|
from six.moves import range
|
|
|
|
import nova.conf
|
|
from nova import exception
|
|
from nova.i18n import _
|
|
from nova import rpc
|
|
from nova.scheduler import client
|
|
from nova.scheduler import driver
|
|
|
|
CONF = nova.conf.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class FilterScheduler(driver.Scheduler):
|
|
"""Scheduler that can be used for filtering and weighing."""
|
|
def __init__(self, *args, **kwargs):
|
|
super(FilterScheduler, self).__init__(*args, **kwargs)
|
|
self.notifier = rpc.get_notifier('scheduler')
|
|
scheduler_client = client.SchedulerClient()
|
|
self.placement_client = scheduler_client.reportclient
|
|
|
|
def select_destinations(self, context, spec_obj, instance_uuids,
|
|
alloc_reqs_by_rp_uuid, provider_summaries):
|
|
"""Returns a sorted list of HostState objects that satisfy the
|
|
supplied request_spec.
|
|
|
|
These hosts will have already had their resources claimed in Placement.
|
|
|
|
:param context: The RequestContext object
|
|
:param spec_obj: The RequestSpec object
|
|
:param instance_uuids: List of UUIDs, one for each value of the spec
|
|
object's num_instances attribute
|
|
:param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider
|
|
UUID, of the allocation requests that may
|
|
be used to claim resources against
|
|
matched hosts. If None, indicates either
|
|
the placement API wasn't reachable or
|
|
that there were no allocation requests
|
|
returned by the placement API. If the
|
|
latter, the provider_summaries will be an
|
|
empty dict, not None.
|
|
:param provider_summaries: Optional dict, keyed by resource provider
|
|
UUID, of information that will be used by
|
|
the filters/weighers in selecting matching
|
|
hosts for a request. If None, indicates that
|
|
the scheduler driver should grab all compute
|
|
node information locally and that the
|
|
Placement API is not used. If an empty dict,
|
|
indicates the Placement API returned no
|
|
potential matches for the requested
|
|
resources.
|
|
"""
|
|
self.notifier.info(
|
|
context, 'scheduler.select_destinations.start',
|
|
dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
|
|
|
|
num_instances = spec_obj.num_instances
|
|
selected_hosts = self._schedule(context, spec_obj, instance_uuids,
|
|
alloc_reqs_by_rp_uuid, provider_summaries)
|
|
|
|
# Couldn't fulfill the request_spec
|
|
if len(selected_hosts) < num_instances:
|
|
# NOTE(Rui Chen): If multiple creates failed, set the updated time
|
|
# of selected HostState to None so that these HostStates are
|
|
# refreshed according to database in next schedule, and release
|
|
# the resource consumed by instance in the process of selecting
|
|
# host.
|
|
for host in selected_hosts:
|
|
host.updated = None
|
|
|
|
# Log the details but don't put those into the reason since
|
|
# we don't want to give away too much information about our
|
|
# actual environment.
|
|
LOG.debug('There are %(hosts)d hosts available but '
|
|
'%(num_instances)d instances requested to build.',
|
|
{'hosts': len(selected_hosts),
|
|
'num_instances': num_instances})
|
|
|
|
reason = _('There are not enough hosts available.')
|
|
raise exception.NoValidHost(reason=reason)
|
|
|
|
self.notifier.info(
|
|
context, 'scheduler.select_destinations.end',
|
|
dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
|
|
return selected_hosts
|
|
|
|
def _schedule(self, context, spec_obj, instance_uuids,
|
|
alloc_reqs_by_rp_uuid, provider_summaries):
|
|
"""Returns a list of hosts that meet the required specs, ordered by
|
|
their fitness.
|
|
|
|
These hosts will have already had their resources claimed in Placement.
|
|
|
|
:param context: The RequestContext object
|
|
:param spec_obj: The RequestSpec object
|
|
:param instance_uuids: List of instance UUIDs to place or move.
|
|
:param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider
|
|
UUID, of the allocation requests that may
|
|
be used to claim resources against
|
|
matched hosts. If None, indicates either
|
|
the placement API wasn't reachable or
|
|
that there were no allocation requests
|
|
returned by the placement API. If the
|
|
latter, the provider_summaries will be an
|
|
empty dict, not None.
|
|
:param provider_summaries: Optional dict, keyed by resource provider
|
|
UUID, of information that will be used by
|
|
the filters/weighers in selecting matching
|
|
hosts for a request. If None, indicates that
|
|
the scheduler driver should grab all compute
|
|
node information locally and that the
|
|
Placement API is not used. If an empty dict,
|
|
indicates the Placement API returned no
|
|
potential matches for the requested
|
|
resources.
|
|
"""
|
|
elevated = context.elevated()
|
|
|
|
# Find our local list of acceptable hosts by repeatedly
|
|
# filtering and weighing our options. Each time we choose a
|
|
# host, we virtually consume resources on it so subsequent
|
|
# selections can adjust accordingly.
|
|
|
|
# Note: remember, we are using an iterator here. So only
|
|
# traverse this list once. This can bite you if the hosts
|
|
# are being scanned in a filter or weighing function.
|
|
hosts = self._get_all_host_states(elevated, spec_obj,
|
|
provider_summaries)
|
|
|
|
# A list of the instance UUIDs that were successfully claimed against
|
|
# in the placement API. If we are not able to successfully claim for
|
|
# all involved instances, we use this list to remove those allocations
|
|
# before returning
|
|
claimed_instance_uuids = []
|
|
|
|
selected_hosts = []
|
|
|
|
# NOTE(sbauza): The RequestSpec.num_instances field contains the number
|
|
# of instances created when the RequestSpec was used to first boot some
|
|
# instances. This is incorrect when doing a move or resize operation,
|
|
# so prefer the length of instance_uuids unless it is None.
|
|
num_instances = (len(instance_uuids) if instance_uuids
|
|
else spec_obj.num_instances)
|
|
for num in range(num_instances):
|
|
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
|
|
if not hosts:
|
|
# NOTE(jaypipes): If we get here, that means not all instances
|
|
# in instance_uuids were able to be matched to a selected host.
|
|
# So, let's clean up any already-claimed allocations here
|
|
# before breaking and returning
|
|
self._cleanup_allocations(claimed_instance_uuids)
|
|
break
|
|
|
|
if (instance_uuids is None or
|
|
not self.USES_ALLOCATION_CANDIDATES or
|
|
alloc_reqs_by_rp_uuid is None):
|
|
# Unfortunately, we still need to deal with older conductors
|
|
# that may not be passing in a list of instance_uuids. In those
|
|
# cases, obviously we can't claim resources because we don't
|
|
# have instance UUIDs to claim with, so we just grab the first
|
|
# host in the list of sorted hosts. In addition to older
|
|
# conductors, we need to support the caching scheduler, which
|
|
# doesn't use the placement API (and has
|
|
# USES_ALLOCATION_CANDIDATE = False) and therefore we skip all
|
|
# the claiming logic for that scheduler driver. Finally, if
|
|
# there was a problem communicating with the placement API,
|
|
# alloc_reqs_by_rp_uuid will be None, so we skip claiming in
|
|
# that case as well
|
|
claimed_host = hosts[0]
|
|
else:
|
|
instance_uuid = instance_uuids[num]
|
|
|
|
# Attempt to claim the resources against one or more resource
|
|
# providers, looping over the sorted list of possible hosts
|
|
# looking for an allocation request that contains that host's
|
|
# resource provider UUID
|
|
claimed_host = None
|
|
for host in hosts:
|
|
cn_uuid = host.uuid
|
|
if cn_uuid not in alloc_reqs_by_rp_uuid:
|
|
LOG.debug("Found host state %s that wasn't in "
|
|
"allocation requests. Skipping.", cn_uuid)
|
|
continue
|
|
|
|
alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
|
|
if self._claim_resources(elevated, spec_obj, instance_uuid,
|
|
alloc_reqs):
|
|
claimed_host = host
|
|
break
|
|
|
|
if claimed_host is None:
|
|
# We weren't able to claim resources in the placement API
|
|
# for any of the sorted hosts identified. So, clean up any
|
|
# successfully-claimed resources for prior instances in
|
|
# this request and return an empty list which will cause
|
|
# select_destinations() to raise NoValidHost
|
|
LOG.debug("Unable to successfully claim against any host.")
|
|
self._cleanup_allocations(claimed_instance_uuids)
|
|
return []
|
|
|
|
claimed_instance_uuids.append(instance_uuid)
|
|
|
|
LOG.debug("Selected host: %(host)s", {'host': claimed_host})
|
|
selected_hosts.append(claimed_host)
|
|
|
|
# Now consume the resources so the filter/weights will change for
|
|
# the next instance.
|
|
claimed_host.consume_from_request(spec_obj)
|
|
if spec_obj.instance_group is not None:
|
|
spec_obj.instance_group.hosts.append(claimed_host.host)
|
|
# hosts has to be not part of the updates when saving
|
|
spec_obj.instance_group.obj_reset_changes(['hosts'])
|
|
return selected_hosts
|
|
|
|
def _cleanup_allocations(self, instance_uuids):
|
|
"""Removes allocations for the supplied instance UUIDs."""
|
|
if not instance_uuids:
|
|
return
|
|
LOG.debug("Cleaning up allocations for %s", instance_uuids)
|
|
for uuid in instance_uuids:
|
|
self.placement_client.delete_allocation_for_instance(uuid)
|
|
|
|
def _claim_resources(self, ctx, spec_obj, instance_uuid, alloc_reqs):
|
|
"""Given an instance UUID (representing the consumer of resources), the
|
|
HostState object for the host that was chosen for the instance, and a
|
|
list of allocation request JSON objects, attempt to claim resources for
|
|
the instance in the placement API. Returns True if the claim process
|
|
was successful, False otherwise.
|
|
|
|
:param ctx: The RequestContext object
|
|
:param spec_obj: The RequestSpec object
|
|
:param instance_uuid: The UUID of the consuming instance
|
|
:param cn_uuid: UUID of the host to allocate against
|
|
:param alloc_reqs: A list of allocation request JSON objects that
|
|
allocate against (at least) the compute host
|
|
selected by the _schedule() method. These allocation
|
|
requests were constructed from a call to the GET
|
|
/allocation_candidates placement API call. Each
|
|
allocation_request satisfies the original request
|
|
for resources and can be supplied as-is (along with
|
|
the project and user ID to the placement API's
|
|
PUT /allocations/{consumer_uuid} call to claim
|
|
resources for the instance
|
|
"""
|
|
LOG.debug("Attempting to claim resources in the placement API for "
|
|
"instance %s", instance_uuid)
|
|
|
|
project_id = spec_obj.project_id
|
|
|
|
# NOTE(jaypipes): So, the RequestSpec doesn't store the user_id,
|
|
# only the project_id, so we need to grab the user information from
|
|
# the context. Perhaps we should consider putting the user ID in
|
|
# the spec object?
|
|
user_id = ctx.user_id
|
|
|
|
# TODO(jaypipes): Loop through all allocation requests instead of just
|
|
# trying the first one. For now, since we'll likely want to order the
|
|
# allocation requests in the future based on information in the
|
|
# provider summaries, we'll just try to claim resources using the first
|
|
# allocation request
|
|
alloc_req = alloc_reqs[0]
|
|
|
|
claimed = self.placement_client.claim_resources(instance_uuid,
|
|
alloc_req, project_id, user_id)
|
|
|
|
if not claimed:
|
|
return False
|
|
|
|
LOG.debug("Successfully claimed resources for instance %s using "
|
|
"allocation request %s", instance_uuid, alloc_req)
|
|
|
|
return True
|
|
|
|
def _get_sorted_hosts(self, spec_obj, host_states, index):
|
|
"""Returns a list of HostState objects that match the required
|
|
scheduling constraints for the request spec object and have been sorted
|
|
according to the weighers.
|
|
"""
|
|
filtered_hosts = self.host_manager.get_filtered_hosts(host_states,
|
|
spec_obj, index)
|
|
|
|
LOG.debug("Filtered %(hosts)s", {'hosts': filtered_hosts})
|
|
|
|
if not filtered_hosts:
|
|
return []
|
|
|
|
weighed_hosts = self.host_manager.get_weighed_hosts(filtered_hosts,
|
|
spec_obj)
|
|
# Strip off the WeighedHost wrapper class...
|
|
weighed_hosts = [h.obj for h in weighed_hosts]
|
|
|
|
LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})
|
|
|
|
# We randomize the first element in the returned list to alleviate
|
|
# congestion where the same host is consistently selected among
|
|
# numerous potential hosts for similar request specs.
|
|
host_subset_size = CONF.filter_scheduler.host_subset_size
|
|
if host_subset_size < len(weighed_hosts):
|
|
weighed_subset = weighed_hosts[0:host_subset_size]
|
|
else:
|
|
weighed_subset = weighed_hosts
|
|
chosen_host = random.choice(weighed_subset)
|
|
weighed_hosts.remove(chosen_host)
|
|
return [chosen_host] + weighed_hosts
|
|
|
|
def _get_all_host_states(self, context, spec_obj, provider_summaries):
|
|
"""Template method, so a subclass can implement caching."""
|
|
# NOTE(jaypipes): None is treated differently from an empty dict. We
|
|
# pass None when we want to grab all compute nodes (for instance, when
|
|
# using the caching scheduler. We pass an empty dict when the Placement
|
|
# API found no providers that match the requested constraints.
|
|
compute_uuids = None
|
|
if provider_summaries is not None:
|
|
compute_uuids = list(provider_summaries.keys())
|
|
return self.host_manager.get_host_states_by_uuids(context,
|
|
compute_uuids,
|
|
spec_obj)
|