From f70119c842958e730d64db44d6684c1501ea3049 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Mon, 18 Dec 2017 17:41:26 -0500 Subject: [PATCH] Don't try to delete build request during a reschedule If populate_retry failed because of MaxRetriesExceeded, don't try to delete build requests because they should already be gone from the initial create attempt, plus we should assume the cell conductor can't reach the API database anyway. Similar for hitting NoValidHost during a reschedule. We can tell if we're doing a reschedule by the num_attempts value in filter_properties, populated via populate_retry, which will be >1 during a reschedule. Change-Id: I0b3ec6bb098ca32ffd32a61d4f9dcf426c3faf46 Closes-Bug: #1736946 (cherry picked from commit cf88a27c6250043859306d47ae82a63535ad507f) (cherry picked from commit 96acf3db0bc9ecaddb04fc9effc4b9ca1553103d) --- nova/conductor/manager.py | 20 ++++++--- nova/tests/unit/conductor/test_conductor.py | 46 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 6ad52f9454be..d4f9f417a9db 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -527,17 +527,25 @@ class ComputeTaskManager(base.Base): hosts = self._schedule_instances( context, request_spec, filter_properties) except Exception as exc: + num_attempts = filter_properties.get( + 'retry', {}).get('num_attempts', 1) updates = {'vm_state': vm_states.ERROR, 'task_state': None} for instance in instances: self._set_vm_state_and_notify( context, instance.uuid, 'build_instances', updates, exc, request_spec) - try: - # If the BuildRequest stays around then instance show/lists - # will pull from it rather than the errored instance. - self._destroy_build_request(context, instance) - except exception.BuildRequestNotFound: - pass + # If num_attempts > 1, we're in a reschedule and probably + # either hit NoValidHost or MaxRetriesExceeded. Either way, + # the build request should already be gone and we probably + # can't reach the API DB from the cell conductor. + if num_attempts <= 1: + try: + # If the BuildRequest stays around then instance + # show/lists will pull from it rather than the errored + # instance. + self._destroy_build_request(context, instance) + except exception.BuildRequestNotFound: + pass self._cleanup_allocated_networks( context, instance, requested_networks) return diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index b18009778f82..9e917bb8b3f4 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -2334,6 +2334,52 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): block_device_mapping=mock.ANY, node='node2', limits=[]) + @mock.patch('nova.objects.Instance.save') + def test_build_instances_max_retries_exceeded(self, mock_save): + """Tests that when populate_retry raises MaxRetriesExceeded in + build_instances, we don't attempt to cleanup the build request. + """ + instance = fake_instance.fake_instance_obj(self.context) + image = {'id': uuids.image_id} + filter_props = { + 'retry': { + 'num_attempts': CONF.scheduler.max_attempts + } + } + requested_networks = objects.NetworkRequestList() + with mock.patch.object(self.conductor, '_destroy_build_request', + new_callable=mock.NonCallableMock): + self.conductor.build_instances( + self.context, [instance], image, filter_props, + mock.sentinel.admin_pass, mock.sentinel.files, + requested_networks, mock.sentinel.secgroups) + mock_save.assert_called_once_with() + + @mock.patch('nova.objects.Instance.save') + def test_build_instances_reschedule_no_valid_host(self, mock_save): + """Tests that when select_destinations raises NoValidHost in + build_instances, we don't attempt to cleanup the build request if + we're rescheduling (num_attempts>1). + """ + instance = fake_instance.fake_instance_obj(self.context) + image = {'id': uuids.image_id} + filter_props = { + 'retry': { + 'num_attempts': 1 # populate_retry will increment this + } + } + requested_networks = objects.NetworkRequestList() + with mock.patch.object(self.conductor, '_destroy_build_request', + new_callable=mock.NonCallableMock): + with mock.patch.object( + self.conductor.scheduler_client, 'select_destinations', + side_effect=exc.NoValidHost(reason='oops')): + self.conductor.build_instances( + self.context, [instance], image, filter_props, + mock.sentinel.admin_pass, mock.sentinel.files, + requested_networks, mock.sentinel.secgroups) + mock_save.assert_called_once_with() + def test_cleanup_allocated_networks_none_requested(self): # Tests that we don't deallocate networks if 'none' were specifically # requested.