Don't try to delete build request during a reschedule

If populate_retry failed because of MaxRetriesExceeded, don't try to delete build requests because they should already be gone from the initial create attempt, plus we should assume the cell conductor can't reach the API database anyway. Similar for hitting NoValidHost during a reschedule. We can tell if we're doing a reschedule by the num_attempts value in filter_properties, populated via populate_retry, which will be >1 during a reschedule. Change-Id: I0b3ec6bb098ca32ffd32a61d4f9dcf426c3faf46 Closes-Bug: #1736946 (cherry picked from commit cf88a27c62)
2017-12-18 17:41:26 -05:00 · 2017-12-18 17:41:26 -05:00 · 96acf3db0b
parent ae7aef15f6
commit 96acf3db0b
2 changed files with 60 additions and 6 deletions
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -552,17 +552,25 @@ class ComputeTaskManager(base.Base):
            hosts = self._schedule_instances(
                    context, spec_obj, instance_uuids)
        except Exception as exc:
+            num_attempts = filter_properties.get(
+                'retry', {}).get('num_attempts', 1)
            updates = {'vm_state': vm_states.ERROR, 'task_state': None}
            for instance in instances:
                self._set_vm_state_and_notify(
                    context, instance.uuid, 'build_instances', updates,
                    exc, request_spec)
-                try:
-                    # If the BuildRequest stays around then instance show/lists
-                    # will pull from it rather than the errored instance.
-                    self._destroy_build_request(context, instance)
-                except exception.BuildRequestNotFound:
-                    pass
+                # If num_attempts > 1, we're in a reschedule and probably
+                # either hit NoValidHost or MaxRetriesExceeded. Either way,
+                # the build request should already be gone and we probably
+                # can't reach the API DB from the cell conductor.
+                if num_attempts <= 1:
+                    try:
+                        # If the BuildRequest stays around then instance
+                        # show/lists will pull from it rather than the errored
+                        # instance.
+                        self._destroy_build_request(context, instance)
+                    except exception.BuildRequestNotFound:
+                        pass
                self._cleanup_allocated_networks(
                    context, instance, requested_networks)
            return
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -2528,6 +2528,52 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                    block_device_mapping=mock.ANY,
                    node='node2', limits=[])

+    @mock.patch('nova.objects.Instance.save')
+    def test_build_instances_max_retries_exceeded(self, mock_save):
+        """Tests that when populate_retry raises MaxRetriesExceeded in
+        build_instances, we don't attempt to cleanup the build request.
+        """
+        instance = fake_instance.fake_instance_obj(self.context)
+        image = {'id': uuids.image_id}
+        filter_props = {
+            'retry': {
+                'num_attempts': CONF.scheduler.max_attempts
+            }
+        }
+        requested_networks = objects.NetworkRequestList()
+        with mock.patch.object(self.conductor, '_destroy_build_request',
+                               new_callable=mock.NonCallableMock):
+            self.conductor.build_instances(
+                self.context, [instance], image, filter_props,
+                mock.sentinel.admin_pass, mock.sentinel.files,
+                requested_networks, mock.sentinel.secgroups)
+            mock_save.assert_called_once_with()
+
+    @mock.patch('nova.objects.Instance.save')
+    def test_build_instances_reschedule_no_valid_host(self, mock_save):
+        """Tests that when select_destinations raises NoValidHost in
+        build_instances, we don't attempt to cleanup the build request if
+        we're rescheduling (num_attempts>1).
+        """
+        instance = fake_instance.fake_instance_obj(self.context)
+        image = {'id': uuids.image_id}
+        filter_props = {
+            'retry': {
+                'num_attempts': 1   # populate_retry will increment this
+            }
+        }
+        requested_networks = objects.NetworkRequestList()
+        with mock.patch.object(self.conductor, '_destroy_build_request',
+                               new_callable=mock.NonCallableMock):
+            with mock.patch.object(
+                    self.conductor.scheduler_client, 'select_destinations',
+                    side_effect=exc.NoValidHost(reason='oops')):
+                self.conductor.build_instances(
+                    self.context, [instance], image, filter_props,
+                    mock.sentinel.admin_pass, mock.sentinel.files,
+                    requested_networks, mock.sentinel.secgroups)
+                mock_save.assert_called_once_with()
+
    def test_cleanup_allocated_networks_none_requested(self):
        # Tests that we don't deallocate networks if 'none' were specifically
        # requested.