Clean up network resources when reschedule fails

During the instance boot (spawn/run) process, neutron ports are allocated for the instance if necessary. If the instance fails to spawn (say as a result of a compute host failure), the default behaviour is to reschedule the instance and leave its networking resources in-tact for potential reuse on the rescheduled host. All is good if the instance is successfully rescheduled, but if the reschedule fails (say no more applicable hosts) the allocated ports are left as-is and effectively orphaned. This commit add code to clean up allocated network resources when the reschedule fails. Change-Id: Ic670dd4dc192603c2faecf18e14ef59ebca9e420 Closes-Bug: #1510979
2015-11-10 17:16:36 +08:00 · 2015-11-10 17:16:36 +08:00 · 08d24b733e
parent 523e8ed5c0
commit 08d24b733e
2 changed files with 62 additions and 32 deletions
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -35,6 +35,7 @@ from nova import exception
 from nova.i18n import _, _LE, _LI, _LW
 from nova import image
 from nova import manager
+from nova import network
 from nova import objects
 from nova.objects import base as nova_object
 from nova import rpc
@ -150,6 +151,7 @@ class ComputeTaskManager(base.Base):
        super(ComputeTaskManager, self).__init__()
        self.compute_rpcapi = compute_rpcapi.ComputeAPI()
        self.image_api = image.API()
+        self.network_api = network.API()
        self.servicegroup_api = servicegroup.API()
        self.scheduler_client = scheduler_client.SchedulerClient()
        self.notifier = rpc.get_notifier('compute', CONF.host)
@ -251,6 +253,11 @@ class ComputeTaskManager(base.Base):
                context, instance_uuid, 'compute_task', method, updates,
                ex, request_spec, self.db)

+    def _cleanup_allocated_networks(
+            self, context, instance, requested_networks):
+        self.network_api.deallocate_for_instance(
+            context, instance, requested_networks=requested_networks)
+
    def _live_migrate(self, context, instance, scheduler_hint,
                      block_migration, disk_over_commit):
        destination = scheduler_hint.get("host")
@ -370,6 +377,8 @@ class ComputeTaskManager(base.Base):
                self._set_vm_state_and_notify(
                    context, instance.uuid, 'build_instances', updates,
                    exc, request_spec)
+                self._cleanup_allocated_networks(
+                    context, instance, requested_networks)
            return

        for (instance, host) in itertools.izip(instances, hosts):
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -488,42 +488,52 @@ class _BaseTaskTestCase(object):
                block_device_mapping='block_device_mapping',
                legacy_bdm=False)

-    def test_build_instances_scheduler_failure(self):
+    @mock.patch.object(scheduler_utils, 'build_request_spec')
+    @mock.patch.object(scheduler_utils, 'setup_instance_group')
+    @mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
+    @mock.patch.object(scheduler_client.SchedulerClient,
+                       'select_destinations')
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_allocated_networks')
+    def test_build_instances_scheduler_failure(
+            self, cleanup_mock, sd_mock, state_mock,
+            sig_mock, bs_mock):
        instances = [fake_instance.fake_instance_obj(self.context)
-                for i in range(2)]
+                     for i in range(2)]
        image = {'fake-data': 'should_pass_silently'}
        spec = {'fake': 'specs',
                'instance_properties': instances[0]}
        exception = exc.NoValidHost(reason='fake-reason')
-        self.mox.StubOutWithMock(scheduler_utils, 'build_request_spec')
-        self.mox.StubOutWithMock(self.conductor_manager, '_schedule_instances')
-        self.mox.StubOutWithMock(scheduler_utils, 'set_vm_state_and_notify')

-        scheduler_utils.build_request_spec(self.context, image,
-                mox.IgnoreArg()).AndReturn(spec)
-        filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
-        self.conductor_manager._schedule_instances(self.context,
-                spec, filter_properties).AndRaise(exception)
+        bs_mock.return_value = spec
+        sd_mock.side_effect = exception
        updates = {'vm_state': vm_states.ERROR, 'task_state': None}
-        for instance in instances:
-            scheduler_utils.set_vm_state_and_notify(
-                self.context, instance.uuid, 'compute_task', 'build_instances',
-                updates, exception, spec, self.conductor_manager.db)
-        self.mox.ReplayAll()

        # build_instances() is a cast, we need to wait for it to complete
        self.useFixture(cast_as_call.CastAsCall(self.stubs))

-        self.conductor.build_instances(self.context,
-                instances=instances,
-                image=image,
-                filter_properties={},
-                admin_password='admin_password',
-                injected_files='injected_files',
-                requested_networks=None,
-                security_groups='security_groups',
-                block_device_mapping='block_device_mapping',
-                legacy_bdm=False)
+        self.conductor.build_instances(
+            self.context,
+            instances=instances,
+            image=image,
+            filter_properties={},
+            admin_password='admin_password',
+            injected_files='injected_files',
+            requested_networks=None,
+            security_groups='security_groups',
+            block_device_mapping='block_device_mapping',
+            legacy_bdm=False)
+
+        set_state_calls = []
+        cleanup_network_calls = []
+        for instance in instances:
+            set_state_calls.append(mock.call(
+                self.context, instance.uuid, 'compute_task', 'build_instances',
+                updates, exception, spec, self.conductor_manager.db))
+            cleanup_network_calls.append(mock.call(
+                self.context, mock.ANY, None))
+        state_mock.assert_has_calls(set_state_calls)
+        cleanup_mock.assert_has_calls(cleanup_network_calls)

    def test_build_instances_retry_exceeded(self):
        instances = [fake_instance.fake_instance_obj(self.context)]
@ -531,9 +541,12 @@ class _BaseTaskTestCase(object):
        filter_properties = {'retry': {'num_attempts': 10, 'hosts': []}}
        updates = {'vm_state': vm_states.ERROR, 'task_state': None}

+        @mock.patch.object(conductor_manager.ComputeTaskManager,
+                           '_cleanup_allocated_networks')
        @mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
        @mock.patch.object(scheduler_utils, 'populate_retry')
-        def _test(populate_retry, set_vm_state_and_notify):
+        def _test(populate_retry,
+                  set_vm_state_and_notify, cleanup_mock):
            # build_instances() is a cast, we need to wait for it to
            # complete
            self.useFixture(cast_as_call.CastAsCall(self.stubs))
@ -559,6 +572,7 @@ class _BaseTaskTestCase(object):
                self.context, instances[0].uuid, 'compute_task',
                'build_instances', updates, mock.ANY, {},
                self.conductor_manager.db)
+            cleanup_mock.assert_called_once_with(self.context, mock.ANY, None)

        _test()

@ -566,8 +580,10 @@ class _BaseTaskTestCase(object):
    @mock.patch.object(scheduler_utils, 'setup_instance_group')
    @mock.patch.object(conductor_manager.ComputeTaskManager,
                       '_set_vm_state_and_notify')
-    def test_build_instances_scheduler_group_failure(self, state_mock,
-                                                     sig_mock, bs_mock):
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_allocated_networks')
+    def test_build_instances_scheduler_group_failure(
+            self, cleanup_mock, state_mock, sig_mock, bs_mock):
        instances = [fake_instance.fake_instance_obj(self.context)
                     for i in range(2)]
        image = {'fake-data': 'should_pass_silently'}
@ -594,11 +610,16 @@ class _BaseTaskTestCase(object):
                          security_groups='security_groups',
                          block_device_mapping='block_device_mapping',
                          legacy_bdm=False)
-        calls = []
+        set_state_calls = []
+        cleanup_network_calls = []
        for instance in instances:
-            calls.append(mock.call(self.context, instance.uuid,
-                         'build_instances', updates, exception, spec))
-        state_mock.assert_has_calls(calls)
+            set_state_calls.append(mock.call(
+                self.context, instance.uuid, 'build_instances', updates,
+                exception, spec))
+            cleanup_network_calls.append(mock.call(
+                self.context, mock.ANY, None))
+        state_mock.assert_has_calls(set_state_calls)
+        cleanup_mock.assert_has_calls(cleanup_network_calls)

    def test_unshelve_instance_on_host(self):
        instance = self._create_fake_instance_obj()