From 3051a0d5d5639b451d6da5e9f490332399b18c54 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 2 Feb 2018 05:41:20 +0000 Subject: [PATCH] Make scheduler.utils.setup_instance_group query all cells To check affinity and anti-affinity policies for scheduling instances, we use the RequestSpec.instance_group.hosts field to check the hosts that have group members on them. Access of the 'hosts' field calls InstanceGroup.get_hosts during a lazy-load and get_hosts does a query for all instances that are members of the group and returns their hosts after removing duplicates. The InstanceList query isn't targeting any cells, so it will return [] in a multi-cell environment in both the instance create case and the instance move case. In the move case, we do have a cell-targeted RequestContext when setup_instance_group is called *but* the RequestSpec.instance_group object is queried early in compute/api before we're targeted to a cell, so a call of RequestSpec.instance_group.get_hosts() will result in [] still, even for move operations. This makes setup_instance_group query all cells for instances that are members of the instance group if the RequestContext is untargeted, else it queries the targeted cell for the instances. Closes-Bug: #1746863 Change-Id: Ia5f5a0d75953b1154a8de3e1eaa15f8042e32d77 (cherry picked from commit 14f4c502f92b10b669044e5069ac3b3555a42ee0) --- nova/objects/instance_group.py | 8 +++ nova/scheduler/utils.py | 54 ++++++++++++++++++- nova/tests/functional/test_server_group.py | 11 ++-- .../tests/unit/objects/test_instance_group.py | 3 +- .../unit/scheduler/test_scheduler_utils.py | 3 +- 5 files changed, 68 insertions(+), 11 deletions(-) diff --git a/nova/objects/instance_group.py b/nova/objects/instance_group.py index 1ae57068a915..04c567a8c2fb 100644 --- a/nova/objects/instance_group.py +++ b/nova/objects/instance_group.py @@ -15,6 +15,7 @@ import copy from oslo_db import exception as db_exc +from oslo_log import log as logging from oslo_serialization import jsonutils from oslo_utils import uuidutils from oslo_utils import versionutils @@ -31,6 +32,7 @@ from nova.objects import fields LAZY_LOAD_FIELDS = ['hosts'] +LOG = logging.getLogger(__name__) def _instance_group_get_query(context, id_field=None, id=None): @@ -341,6 +343,12 @@ class InstanceGroup(base.NovaPersistentObject, base.NovaObject, raise exception.ObjectActionError( action='obj_load_attr', reason='unable to load %s' % attrname) + LOG.debug("Lazy-loading '%(attr)s' on %(name)s uuid %(uuid)s", + {'attr': attrname, + 'name': self.obj_name(), + 'uuid': self.uuid, + }) + self.hosts = self.get_hosts() self.obj_reset_changes(['hosts']) diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 26f3b54a06ac..3c6e65a573ff 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -28,6 +28,7 @@ from nova.api.openstack.placement import lib as placement_lib from nova.compute import flavors from nova.compute import utils as compute_utils import nova.conf +from nova import context as nova_context from nova import exception from nova.i18n import _ from nova import objects @@ -813,12 +814,44 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None): msg = _("ServerGroupSoftAntiAffinityWeigher not configured") LOG.error(msg) raise exception.UnsupportedPolicyException(reason=msg) - group_hosts = set(group.get_hosts()) + # NOTE(melwitt): If the context is already targeted to a cell (during a + # move operation), we don't need to scatter-gather. + if context.db_connection: + # We don't need to target the group object's context because it was + # retrieved with the targeted context earlier in this method. + group_hosts = set(group.get_hosts()) + else: + group_hosts = set(_get_instance_group_hosts_all_cells(context, + group)) user_hosts = set(user_group_hosts) if user_group_hosts else set() return GroupDetails(hosts=user_hosts | group_hosts, policy=group.policy, members=group.members) +def _get_instance_group_hosts_all_cells(context, instance_group): + def get_hosts_in_cell(cell_context): + # NOTE(melwitt): The obj_alternate_context is going to mutate the + # cell_instance_group._context and to do this in a scatter-gather + # with multiple parallel greenthreads, we need the instance groups + # to be separate object copies. + cell_instance_group = instance_group.obj_clone() + with cell_instance_group.obj_alternate_context(cell_context): + return cell_instance_group.get_hosts() + + results = nova_context.scatter_gather_skip_cell0(context, + get_hosts_in_cell) + hosts = [] + for result in results.values(): + # TODO(melwitt): We will need to handle scenarios where an exception + # is raised while targeting a cell and when a cell does not respond + # as part of the "handling of a down cell" spec: + # https://blueprints.launchpad.net/nova/+spec/handling-down-cell + if result not in (nova_context.did_not_respond_sentinel, + nova_context.raised_exception_sentinel): + hosts.extend(result) + return hosts + + def setup_instance_group(context, request_spec): """Add group_hosts and group_policies fields to filter_properties dict based on instance uuids provided in request_spec, if those instances are @@ -826,11 +859,30 @@ def setup_instance_group(context, request_spec): :param request_spec: Request spec """ + # NOTE(melwitt): Proactively query for the instance group hosts instead of + # relying on a lazy-load via the 'hosts' field of the InstanceGroup object. + if (request_spec.instance_group and + 'hosts' not in request_spec.instance_group): + group = request_spec.instance_group + # If the context is already targeted to a cell (during a move + # operation), we don't need to scatter-gather. We do need to use + # obj_alternate_context here because the RequestSpec is queried at the + # start of a move operation in compute/api, before the context has been + # targeted. + if context.db_connection: + with group.obj_alternate_context(context): + group.hosts = group.get_hosts() + else: + group.hosts = _get_instance_group_hosts_all_cells(context, group) + if request_spec.instance_group and request_spec.instance_group.hosts: group_hosts = request_spec.instance_group.hosts else: group_hosts = None instance_uuid = request_spec.instance_uuid + # This queries the group details for the group where the instance is a + # member. The group_hosts passed in are the hosts that contain members of + # the requested instance group. group_info = _get_group_details(context, instance_uuid, group_hosts) if group_info is not None: request_spec.instance_group.hosts = list(group_info.hosts) diff --git a/nova/tests/functional/test_server_group.py b/nova/tests/functional/test_server_group.py index 799d834b1dd5..a9267d4382e8 100644 --- a/nova/tests/functional/test_server_group.py +++ b/nova/tests/functional/test_server_group.py @@ -966,12 +966,7 @@ class ServerGroupTestMultiCell(ServerGroupTestBase): # cell1. So boot the server to cell2 where the group member cannot be # found as a result of the default setting. self._boot_a_server_to_group(created_group, az='cell2') - # TODO(melwitt): Uncomment this when the bug is fixed. - # self._boot_a_server_to_group(created_group, az='cell1', - # expected_status='ERROR') - # TODO(melwitt): Remove this when the bug is fixed. # Boot a server to cell1 with the affinity policy. This should fail - # because a group member has landed in cell2 already. But it passes - # because of the bug -- the query for group members doesn't find any - # members in cell2 because the query isn't multi-cell enabled. - self._boot_a_server_to_group(created_group, az='cell1') + # because group members found in cell2 should violate the policy. + self._boot_a_server_to_group(created_group, az='cell1', + expected_status='ERROR') diff --git a/nova/tests/unit/objects/test_instance_group.py b/nova/tests/unit/objects/test_instance_group.py index 1d2e585955ee..482190e114d4 100644 --- a/nova/tests/unit/objects/test_instance_group.py +++ b/nova/tests/unit/objects/test_instance_group.py @@ -327,7 +327,8 @@ class _TestInstanceGroupObject(object): mock_get_by_filt.return_value = [objects.Instance(host='host1'), objects.Instance(host='host2')] - obj = objects.InstanceGroup(self.context, members=['uuid1']) + obj = objects.InstanceGroup(self.context, members=['uuid1'], + uuid=uuids.group) self.assertEqual(2, len(obj.hosts)) self.assertIn('host1', obj.hosts) self.assertIn('host2', obj.hosts) diff --git a/nova/tests/unit/scheduler/test_scheduler_utils.py b/nova/tests/unit/scheduler/test_scheduler_utils.py index 9b5e14e61565..dfdc5dc9c13d 100644 --- a/nova/tests/unit/scheduler/test_scheduler_utils.py +++ b/nova/tests/unit/scheduler/test_scheduler_utils.py @@ -21,6 +21,7 @@ import six from nova.compute import flavors from nova.compute import utils as compute_utils +from nova import context as nova_context from nova import exception from nova import objects from nova.scheduler import utils as scheduler_utils @@ -34,7 +35,7 @@ class SchedulerUtilsTestCase(test.NoDBTestCase): """Test case for scheduler utils methods.""" def setUp(self): super(SchedulerUtilsTestCase, self).setUp() - self.context = 'fake-context' + self.context = nova_context.get_context() def test_build_request_spec_without_image(self): instance = {'uuid': uuids.instance}