Clean up network resources when reschedule fails

During the instance boot (spawn/run) process, neutron ports are
allocated for the instance if necessary. If the instance fails
to spawn (say as a result of a compute host failure), the default
behaviour is to reschedule the instance and leave its networking
resources in-tact for potential reuse on the rescheduled host.
All is good if the instance is successfully rescheduled, but if
the reschedule fails (say no more applicable hosts) the allocated
ports are left as-is and effectively orphaned.

This commit add code to clean up allocated network resources
when the reschedule fails.

Change-Id: Ic670dd4dc192603c2faecf18e14ef59ebca9e420
Closes-Bug: #1510979
This commit is contained in:
Wen Zhi Yu 2015-11-10 17:16:36 +08:00 committed by Wen Zhi Yu
parent 523e8ed5c0
commit 08d24b733e
2 changed files with 62 additions and 32 deletions

View File

@ -35,6 +35,7 @@ from nova import exception
from nova.i18n import _, _LE, _LI, _LW
from nova import image
from nova import manager
from nova import network
from nova import objects
from nova.objects import base as nova_object
from nova import rpc
@ -150,6 +151,7 @@ class ComputeTaskManager(base.Base):
super(ComputeTaskManager, self).__init__()
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
self.image_api = image.API()
self.network_api = network.API()
self.servicegroup_api = servicegroup.API()
self.scheduler_client = scheduler_client.SchedulerClient()
self.notifier = rpc.get_notifier('compute', CONF.host)
@ -251,6 +253,11 @@ class ComputeTaskManager(base.Base):
context, instance_uuid, 'compute_task', method, updates,
ex, request_spec, self.db)
def _cleanup_allocated_networks(
self, context, instance, requested_networks):
self.network_api.deallocate_for_instance(
context, instance, requested_networks=requested_networks)
def _live_migrate(self, context, instance, scheduler_hint,
block_migration, disk_over_commit):
destination = scheduler_hint.get("host")
@ -370,6 +377,8 @@ class ComputeTaskManager(base.Base):
self._set_vm_state_and_notify(
context, instance.uuid, 'build_instances', updates,
exc, request_spec)
self._cleanup_allocated_networks(
context, instance, requested_networks)
return
for (instance, host) in itertools.izip(instances, hosts):

View File

@ -488,42 +488,52 @@ class _BaseTaskTestCase(object):
block_device_mapping='block_device_mapping',
legacy_bdm=False)
def test_build_instances_scheduler_failure(self):
@mock.patch.object(scheduler_utils, 'build_request_spec')
@mock.patch.object(scheduler_utils, 'setup_instance_group')
@mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
@mock.patch.object(scheduler_client.SchedulerClient,
'select_destinations')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
def test_build_instances_scheduler_failure(
self, cleanup_mock, sd_mock, state_mock,
sig_mock, bs_mock):
instances = [fake_instance.fake_instance_obj(self.context)
for i in range(2)]
for i in range(2)]
image = {'fake-data': 'should_pass_silently'}
spec = {'fake': 'specs',
'instance_properties': instances[0]}
exception = exc.NoValidHost(reason='fake-reason')
self.mox.StubOutWithMock(scheduler_utils, 'build_request_spec')
self.mox.StubOutWithMock(self.conductor_manager, '_schedule_instances')
self.mox.StubOutWithMock(scheduler_utils, 'set_vm_state_and_notify')
scheduler_utils.build_request_spec(self.context, image,
mox.IgnoreArg()).AndReturn(spec)
filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
self.conductor_manager._schedule_instances(self.context,
spec, filter_properties).AndRaise(exception)
bs_mock.return_value = spec
sd_mock.side_effect = exception
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
for instance in instances:
scheduler_utils.set_vm_state_and_notify(
self.context, instance.uuid, 'compute_task', 'build_instances',
updates, exception, spec, self.conductor_manager.db)
self.mox.ReplayAll()
# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self.stubs))
self.conductor.build_instances(self.context,
instances=instances,
image=image,
filter_properties={},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
self.conductor.build_instances(
self.context,
instances=instances,
image=image,
filter_properties={},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
set_state_calls = []
cleanup_network_calls = []
for instance in instances:
set_state_calls.append(mock.call(
self.context, instance.uuid, 'compute_task', 'build_instances',
updates, exception, spec, self.conductor_manager.db))
cleanup_network_calls.append(mock.call(
self.context, mock.ANY, None))
state_mock.assert_has_calls(set_state_calls)
cleanup_mock.assert_has_calls(cleanup_network_calls)
def test_build_instances_retry_exceeded(self):
instances = [fake_instance.fake_instance_obj(self.context)]
@ -531,9 +541,12 @@ class _BaseTaskTestCase(object):
filter_properties = {'retry': {'num_attempts': 10, 'hosts': []}}
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
@mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
@mock.patch.object(scheduler_utils, 'populate_retry')
def _test(populate_retry, set_vm_state_and_notify):
def _test(populate_retry,
set_vm_state_and_notify, cleanup_mock):
# build_instances() is a cast, we need to wait for it to
# complete
self.useFixture(cast_as_call.CastAsCall(self.stubs))
@ -559,6 +572,7 @@ class _BaseTaskTestCase(object):
self.context, instances[0].uuid, 'compute_task',
'build_instances', updates, mock.ANY, {},
self.conductor_manager.db)
cleanup_mock.assert_called_once_with(self.context, mock.ANY, None)
_test()
@ -566,8 +580,10 @@ class _BaseTaskTestCase(object):
@mock.patch.object(scheduler_utils, 'setup_instance_group')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_set_vm_state_and_notify')
def test_build_instances_scheduler_group_failure(self, state_mock,
sig_mock, bs_mock):
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
def test_build_instances_scheduler_group_failure(
self, cleanup_mock, state_mock, sig_mock, bs_mock):
instances = [fake_instance.fake_instance_obj(self.context)
for i in range(2)]
image = {'fake-data': 'should_pass_silently'}
@ -594,11 +610,16 @@ class _BaseTaskTestCase(object):
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
calls = []
set_state_calls = []
cleanup_network_calls = []
for instance in instances:
calls.append(mock.call(self.context, instance.uuid,
'build_instances', updates, exception, spec))
state_mock.assert_has_calls(calls)
set_state_calls.append(mock.call(
self.context, instance.uuid, 'build_instances', updates,
exception, spec))
cleanup_network_calls.append(mock.call(
self.context, mock.ANY, None))
state_mock.assert_has_calls(set_state_calls)
cleanup_mock.assert_has_calls(cleanup_network_calls)
def test_unshelve_instance_on_host(self):
instance = self._create_fake_instance_obj()