From 96acf3db0bc9ecaddb04fc9effc4b9ca1553103d Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Mon, 18 Dec 2017 17:41:26 -0500 Subject: [PATCH] Don't try to delete build request during a reschedule If populate_retry failed because of MaxRetriesExceeded, don't try to delete build requests because they should already be gone from the initial create attempt, plus we should assume the cell conductor can't reach the API database anyway. Similar for hitting NoValidHost during a reschedule. We can tell if we're doing a reschedule by the num_attempts value in filter_properties, populated via populate_retry, which will be >1 during a reschedule. Change-Id: I0b3ec6bb098ca32ffd32a61d4f9dcf426c3faf46 Closes-Bug: #1736946 (cherry picked from commit cf88a27c6250043859306d47ae82a63535ad507f) --- nova/conductor/manager.py | 20 ++++++--- nova/tests/unit/conductor/test_conductor.py | 46 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 27cb5d22e70c..8afe39be7690 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -552,17 +552,25 @@ class ComputeTaskManager(base.Base): hosts = self._schedule_instances( context, spec_obj, instance_uuids) except Exception as exc: + num_attempts = filter_properties.get( + 'retry', {}).get('num_attempts', 1) updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) - try: - # If the BuildRequest stays around then instance show/lists - # will pull from it rather than the errored instance. - self._destroy_build_request(context, instance) - except exception.BuildRequestNotFound: - pass + # If num_attempts > 1, we're in a reschedule and probably + # either hit NoValidHost or MaxRetriesExceeded. Either way, + # the build request should already be gone and we probably + # can't reach the API DB from the cell conductor. + if num_attempts <= 1: + try: + # If the BuildRequest stays around then instance + # show/lists will pull from it rather than the errored + # instance. + self._destroy_build_request(context, instance) + except exception.BuildRequestNotFound: + pass self._cleanup_allocated_networks( context, instance, requested_networks) return diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index c011d9a3cd6d..2e885e4bc282 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -2528,6 +2528,52 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): block_device_mapping=mock.ANY, node='node2', limits=[]) + @mock.patch('nova.objects.Instance.save') + def test_build_instances_max_retries_exceeded(self, mock_save): + """Tests that when populate_retry raises MaxRetriesExceeded in + build_instances, we don't attempt to cleanup the build request. + """ + instance = fake_instance.fake_instance_obj(self.context) + image = {'id': uuids.image_id} + filter_props = { + 'retry': { + 'num_attempts': CONF.scheduler.max_attempts + } + } + requested_networks = objects.NetworkRequestList() + with mock.patch.object(self.conductor, '_destroy_build_request', + new_callable=mock.NonCallableMock): + self.conductor.build_instances( + self.context, [instance], image, filter_props, + mock.sentinel.admin_pass, mock.sentinel.files, + requested_networks, mock.sentinel.secgroups) + mock_save.assert_called_once_with() + + @mock.patch('nova.objects.Instance.save') + def test_build_instances_reschedule_no_valid_host(self, mock_save): + """Tests that when select_destinations raises NoValidHost in + build_instances, we don't attempt to cleanup the build request if + we're rescheduling (num_attempts>1). + """ + instance = fake_instance.fake_instance_obj(self.context) + image = {'id': uuids.image_id} + filter_props = { + 'retry': { + 'num_attempts': 1 # populate_retry will increment this + } + } + requested_networks = objects.NetworkRequestList() + with mock.patch.object(self.conductor, '_destroy_build_request', + new_callable=mock.NonCallableMock): + with mock.patch.object( + self.conductor.scheduler_client, 'select_destinations', + side_effect=exc.NoValidHost(reason='oops')): + self.conductor.build_instances( + self.context, [instance], image, filter_props, + mock.sentinel.admin_pass, mock.sentinel.files, + requested_networks, mock.sentinel.secgroups) + mock_save.assert_called_once_with() + def test_cleanup_allocated_networks_none_requested(self): # Tests that we don't deallocate networks if 'none' were specifically # requested.