Validate server group affinity policy

In git commit a79ecbe Russell Bryant submitted a partial fix for a race
condition when booting an instance as part of a server group with an
"anti-affinity" scheduler policy.

It's possible to hit a similar race condition for server groups with
the "affinity" policy. Suppose we create a new group and then create two
instances simultaneously. The scheduler sees an empty group for each,
assigns them to different compute nodes, and the policy is violated.

To guard against this, we extend _validate_instance_group_policy()
to cover the "affinity" case as well as "anti-affinity".

Partial-Bug: #1423648
Change-Id: Icf95390a128e2062293e1f5b7b78fe79747f5f27
This commit is contained in:
Chris Friesen 2015-03-16 09:35:16 -06:00
parent 9e4bb495e8
commit 36a7035162
2 changed files with 34 additions and 22 deletions

View File

@ -1466,10 +1466,12 @@ class ComputeManager(manager.Manager):
filter_properties):
# NOTE(russellb) Instance group policy is enforced by the scheduler.
# However, there is a race condition with the enforcement of
# anti-affinity. Since more than one instance may be scheduled at the
# the policy. Since more than one instance may be scheduled at the
# same time, it's possible that more than one instance with an
# anti-affinity policy may end up here. This is a validation step to
# make sure that starting the instance here doesn't violate the policy.
# anti-affinity policy may end up here. It's also possible that
# multiple instances with an affinity policy could end up on different
# hosts. This is a validation step to make sure that starting the
# instance here doesn't violate the policy.
scheduler_hints = filter_properties.get('scheduler_hints') or {}
group_hint = scheduler_hints.get('group')
@ -1479,15 +1481,21 @@ class ComputeManager(manager.Manager):
@utils.synchronized(group_hint)
def _do_validation(context, instance, group_hint):
group = objects.InstanceGroup.get_by_hint(context, group_hint)
if 'anti-affinity' not in group.policies:
return
group_hosts = group.get_hosts(exclude=[instance.uuid])
if self.host in group_hosts:
msg = _("Anti-affinity instance group policy was violated.")
raise exception.RescheduledException(
instance_uuid=instance.uuid,
reason=msg)
if 'anti-affinity' in group.policies:
group_hosts = group.get_hosts(exclude=[instance.uuid])
if self.host in group_hosts:
msg = _("Anti-affinity instance group policy "
"was violated.")
raise exception.RescheduledException(
instance_uuid=instance.uuid,
reason=msg)
elif 'affinity' in group.policies:
group_hosts = group.get_hosts(exclude=[instance.uuid])
if group_hosts and self.host not in group_hosts:
msg = _("Affinity instance group policy was violated.")
raise exception.RescheduledException(
instance_uuid=instance.uuid,
reason=msg)
_do_validation(context, instance, group_hint)

View File

@ -3894,9 +3894,9 @@ class ComputeTestCase(BaseTestCase):
self.compute.run_instance(self.context, instance, {}, {}, None, None,
None, True, None, False)
def _create_server_group(self):
def _create_server_group(self, policies, instance_host):
group_instance = self._create_fake_instance_obj(
params=dict(host=self.compute.host))
params=dict(host=instance_host))
instance_group = objects.InstanceGroup(self.context)
instance_group.user_id = self.user_id
@ -3904,7 +3904,7 @@ class ComputeTestCase(BaseTestCase):
instance_group.name = 'messi'
instance_group.uuid = str(uuid.uuid4())
instance_group.members = [group_instance.uuid]
instance_group.policies = ['anti-affinity']
instance_group.policies = policies
fake_notifier.NOTIFICATIONS = []
instance_group.create()
self.assertEqual(1, len(fake_notifier.NOTIFICATIONS))
@ -3917,7 +3917,7 @@ class ComputeTestCase(BaseTestCase):
self.assertEqual('servergroup.create', msg.event_type)
return instance_group
def _run_instance_reschedules_on_anti_affinity_violation(self, group,
def _run_instance_reschedules_on_policy_violation(self, group,
hint):
instance = self._create_fake_instance_obj()
filter_properties = {'scheduler_hints': {'group': hint}}
@ -3928,14 +3928,18 @@ class ComputeTestCase(BaseTestCase):
None, False)
def test_run_instance_reschedules_on_anti_affinity_violation_by_name(self):
group = self._create_server_group()
self._run_instance_reschedules_on_anti_affinity_violation(group,
group.name)
group = self._create_server_group(['anti-affinity'], self.compute.host)
self._run_instance_reschedules_on_policy_violation(group, group.name)
def test_run_instance_reschedules_on_anti_affinity_violation_by_uuid(self):
group = self._create_server_group()
self._run_instance_reschedules_on_anti_affinity_violation(group,
group.uuid)
group = self._create_server_group(['anti-affinity'], self.compute.host)
self._run_instance_reschedules_on_policy_violation(group, group.uuid)
def test_run_instance_reschedules_on_affinity_violation_by_uuid(self):
# Put the fake instance already in the group on a different host
hostname = self.compute.host + '.1'
group = self._create_server_group(['affinity'], hostname)
self._run_instance_reschedules_on_policy_violation(group, group.uuid)
def test_instance_set_to_error_on_uncaught_exception(self):
# Test that instance is set to error state when exception is raised.