Don't try to delete build request during a reschedule

If populate_retry failed because of MaxRetriesExceeded,
don't try to delete build requests because they should
already be gone from the initial create attempt, plus
we should assume the cell conductor can't reach the API
database anyway.

Similar for hitting NoValidHost during a reschedule. We
can tell if we're doing a reschedule by the num_attempts
value in filter_properties, populated via populate_retry,
which will be >1 during a reschedule.

Change-Id: I0b3ec6bb098ca32ffd32a61d4f9dcf426c3faf46
Closes-Bug: #1736946
(cherry picked from commit cf88a27c62)
This commit is contained in:
Matt Riedemann 2017-12-18 17:41:26 -05:00
parent ae7aef15f6
commit 96acf3db0b
2 changed files with 60 additions and 6 deletions

View File

@ -552,17 +552,25 @@ class ComputeTaskManager(base.Base):
hosts = self._schedule_instances(
context, spec_obj, instance_uuids)
except Exception as exc:
num_attempts = filter_properties.get(
'retry', {}).get('num_attempts', 1)
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
for instance in instances:
self._set_vm_state_and_notify(
context, instance.uuid, 'build_instances', updates,
exc, request_spec)
try:
# If the BuildRequest stays around then instance show/lists
# will pull from it rather than the errored instance.
self._destroy_build_request(context, instance)
except exception.BuildRequestNotFound:
pass
# If num_attempts > 1, we're in a reschedule and probably
# either hit NoValidHost or MaxRetriesExceeded. Either way,
# the build request should already be gone and we probably
# can't reach the API DB from the cell conductor.
if num_attempts <= 1:
try:
# If the BuildRequest stays around then instance
# show/lists will pull from it rather than the errored
# instance.
self._destroy_build_request(context, instance)
except exception.BuildRequestNotFound:
pass
self._cleanup_allocated_networks(
context, instance, requested_networks)
return

View File

@ -2528,6 +2528,52 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
block_device_mapping=mock.ANY,
node='node2', limits=[])
@mock.patch('nova.objects.Instance.save')
def test_build_instances_max_retries_exceeded(self, mock_save):
"""Tests that when populate_retry raises MaxRetriesExceeded in
build_instances, we don't attempt to cleanup the build request.
"""
instance = fake_instance.fake_instance_obj(self.context)
image = {'id': uuids.image_id}
filter_props = {
'retry': {
'num_attempts': CONF.scheduler.max_attempts
}
}
requested_networks = objects.NetworkRequestList()
with mock.patch.object(self.conductor, '_destroy_build_request',
new_callable=mock.NonCallableMock):
self.conductor.build_instances(
self.context, [instance], image, filter_props,
mock.sentinel.admin_pass, mock.sentinel.files,
requested_networks, mock.sentinel.secgroups)
mock_save.assert_called_once_with()
@mock.patch('nova.objects.Instance.save')
def test_build_instances_reschedule_no_valid_host(self, mock_save):
"""Tests that when select_destinations raises NoValidHost in
build_instances, we don't attempt to cleanup the build request if
we're rescheduling (num_attempts>1).
"""
instance = fake_instance.fake_instance_obj(self.context)
image = {'id': uuids.image_id}
filter_props = {
'retry': {
'num_attempts': 1 # populate_retry will increment this
}
}
requested_networks = objects.NetworkRequestList()
with mock.patch.object(self.conductor, '_destroy_build_request',
new_callable=mock.NonCallableMock):
with mock.patch.object(
self.conductor.scheduler_client, 'select_destinations',
side_effect=exc.NoValidHost(reason='oops')):
self.conductor.build_instances(
self.context, [instance], image, filter_props,
mock.sentinel.admin_pass, mock.sentinel.files,
requested_networks, mock.sentinel.secgroups)
mock_save.assert_called_once_with()
def test_cleanup_allocated_networks_none_requested(self):
# Tests that we don't deallocate networks if 'none' were specifically
# requested.