Continue running on_end() when instance deletion fails

A user can delete their instances while the on_end function is
executing. This can result in novaclient raising a NotFound exception,
preventing on_end() from completing successfully. The host allocations
are deleted at this point, but the hosts are left in the reservation's
aggregate, preventing future leases from starting.

This patch catches all exceptions and continues running on_end().

Change-Id: Ib495a26f9ec2e7eae64e1eaf69a6cf635c3aadcd
Closes-Bug: #1786073
This commit is contained in:
Pierre Riteau 2018-08-08 18:34:50 +02:00
parent 3cdd01a3cd
commit 8c46b5c73e
2 changed files with 14 additions and 1 deletions

View File

@ -16,6 +16,7 @@
import datetime
from novaclient import exceptions as nova_exceptions
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import strutils
@ -198,7 +199,13 @@ class PhysicalHostPlugin(base.BasePlugin, nova.NovaClientWrapper):
for host in pool.get_computehosts(host_reservation['aggregate_id']):
for server in self.nova.servers.list(
search_opts={"host": host, "all_tenants": 1}):
self.nova.servers.delete(server=server)
try:
self.nova.servers.delete(server=server)
except nova_exceptions.NotFound:
LOG.info('Could not find server %s, may have been deleted '
'concurrently.', server)
except Exception as e:
LOG.exception('Failed to delete %s: %s.', server, str(e))
try:
pool.delete(host_reservation['aggregate_id'])
except manager_ex.AggregateNotFound:

View File

@ -17,6 +17,7 @@ import datetime
import mock
from novaclient import client as nova_client
from novaclient import exceptions as nova_exceptions
from oslo_config import cfg
from oslo_config import fixture as conf_fixture
import testtools
@ -1404,6 +1405,11 @@ class PhysicalHostPluginTestCase(tests.TestCase):
list_servers = self.patch(self.ServerManager, 'list')
list_servers.return_value = ['server1', 'server2']
delete_server = self.patch(self.ServerManager, 'delete')
# Mock delete_server so the first call fails to find the instance.
# This can happen when the user is deleting instances concurrently.
delete_server.side_effect = mock.Mock(
side_effect=[nova_exceptions.NotFound(
404, 'Instance server1 could not be found.'), None])
delete_pool = self.patch(self.nova.ReservationPool, 'delete')
self.fake_phys_plugin.on_end(u'04de74e8-193a-49d2-9ab8-cba7b49e45e8')
host_reservation_update.assert_called_with(