Handle racey teardowns in DHCP agent

Capture port not found exceptions from port updates of DHCP ports
that no longer exist. The DHCP agent already checks the return
value for None in case any of the other things went missing
(e.g. Subnet, Network), so checking for ports disappearing makes
sense. The corresponding agent-side log message for this has also
been downgraded to debug since this is a normal occurrence.

This also cleans up log noise from calling reload_allocations on
networks that have already been torn down due to all of the subnets
being removed.

Closes-Bug: #1621650
Change-Id: I495401d225c664b8f1cf7b3d51747f3b47c24fc0
This commit is contained in:
Kevin Benton 2016-09-08 15:19:06 -07:00
parent 493be3e11c
commit 9d24490da8
6 changed files with 64 additions and 16 deletions

View File

@ -116,11 +116,10 @@ class DhcpAgent(manager.Manager):
except exceptions.Conflict:
# No need to resync here, the agent will receive the event related
# to a status update for the network
LOG.warning(_LW('Unable to %(action)s dhcp for %(net_id)s: there '
'is a conflict with its current state; please '
'check that the network and/or its subnet(s) '
'still exist.'),
{'net_id': network.id, 'action': action})
LOG.debug('Unable to %(action)s dhcp for %(net_id)s: there '
'is a conflict with its current state; please '
'check that the network and/or its subnet(s) '
'still exist.', {'net_id': network.id, 'action': action})
except Exception as e:
if getattr(e, 'exc_type', '') != 'IpAddressGenerationFailure':
# Don't resync if port could not be created because of an IP
@ -385,6 +384,7 @@ class DhcpAgent(manager.Manager):
if self._is_port_on_this_agent(updated_port):
orig = self.cache.get_port_by_id(updated_port['id'])
# assume IP change if not in cache
orig = orig or {'fixed_ips': []}
old_ips = {i['ip_address'] for i in orig['fixed_ips'] or []}
new_ips = {i['ip_address'] for i in updated_port['fixed_ips']}
if old_ips != new_ips:

View File

@ -475,6 +475,12 @@ class Dnsmasq(DhcpLocalProcess):
LOG.debug('Killing dnsmasq for network since all subnets have '
'turned off DHCP: %s', self.network.id)
return
if not self.interface_name:
# we land here if above has been called and we receive port
# delete notifications for the network
LOG.debug('Agent does not have an interface on this network '
'anymore, skipping reload: %s', self.network.id)
return
self._release_unused_leases()
self._spawn_or_reload_process(reload_with_HUP=True)

View File

@ -262,16 +262,23 @@ class DhcpRpcCallback(object):
port['id'] = kwargs.get('port_id')
port['port'][portbindings.HOST_ID] = host
plugin = manager.NeutronManager.get_plugin()
old_port = plugin.get_port(context, port['id'])
if (old_port['device_id'] != n_const.DEVICE_ID_RESERVED_DHCP_PORT
and old_port['device_id'] !=
utils.get_dhcp_agent_device_id(port['port']['network_id'], host)):
raise n_exc.DhcpPortInUse(port_id=port['id'])
LOG.debug('Update dhcp port %(port)s '
'from %(host)s.',
{'port': port,
'host': host})
return self._port_action(plugin, context, port, 'update_port')
try:
old_port = plugin.get_port(context, port['id'])
if (old_port['device_id'] != n_const.DEVICE_ID_RESERVED_DHCP_PORT
and old_port['device_id'] !=
utils.get_dhcp_agent_device_id(port['port']['network_id'],
host)):
raise n_exc.DhcpPortInUse(port_id=port['id'])
LOG.debug('Update dhcp port %(port)s '
'from %(host)s.',
{'port': port,
'host': host})
return self._port_action(plugin, context, port, 'update_port')
except exceptions.PortNotFound:
LOG.debug('Host %(host)s tried to update port '
'%(port_id)s which no longer exists.',
{'host': host, 'port_id': port['id']})
return None
@db_api.retry_db_errors
def dhcp_ready_on_ports(self, context, port_ids):

View File

@ -1052,6 +1052,18 @@ class TestDhcpAgentEventHandler(base.BaseTestCase):
self.call_driver.assert_has_calls(
[mock.call.call_driver('restart', fake_network)])
def test_port_update_change_ip_on_dhcp_agents_port_cache_miss(self):
self.cache.get_network_by_id.return_value = fake_network
self.cache.get_port_by_id.return_value = None
payload = dict(port=copy.deepcopy(fake_port1))
device_id = utils.get_dhcp_agent_device_id(
payload['port']['network_id'], self.dhcp.conf.host)
payload['port']['fixed_ips'][0]['ip_address'] = '172.9.9.99'
payload['port']['device_id'] = device_id
self.dhcp.port_update_end(None, payload)
self.call_driver.assert_has_calls(
[mock.call.call_driver('restart', fake_network)])
def test_port_update_on_dhcp_agents_port_no_ip_change(self):
self.cache.get_network_by_id.return_value = fake_network
self.cache.get_port_by_id.return_value = fake_port1

View File

@ -1608,6 +1608,15 @@ class TestDnsmasq(TestBase):
exp_addn_name, exp_addn_data,
exp_opt_name, exp_opt_data,)
def test_reload_allocations_no_interface(self):
net = FakeDualNetwork()
ipath = '/dhcp/%s/interface' % net.id
self.useFixture(tools.OpenFixture(ipath))
test_pm = mock.Mock()
dm = self._get_dnsmasq(net, test_pm)
dm.reload_allocations()
self.assertFalse(test_pm.register.called)
def test_reload_allocations(self):
(exp_host_name, exp_host_data,
exp_addn_name, exp_addn_data,
@ -1617,7 +1626,7 @@ class TestDnsmasq(TestBase):
hpath = '/dhcp/%s/host' % net.id
ipath = '/dhcp/%s/interface' % net.id
self.useFixture(tools.OpenFixture(hpath))
self.useFixture(tools.OpenFixture(ipath))
self.useFixture(tools.OpenFixture(ipath, 'tapdancingmice'))
test_pm = mock.Mock()
dm = self._get_dnsmasq(net, test_pm)
dm.reload_allocations()

View File

@ -162,6 +162,20 @@ class TestDhcpRpcCallback(base.BaseTestCase):
exc=n_exc.InvalidInput(error_message='sorry'),
action='create_port')
def test_update_port_missing_port_on_get(self):
self.plugin.get_port.side_effect = n_exc.PortNotFound(port_id='66')
self.assertIsNone(self.callbacks.update_dhcp_port(
context='ctx', host='host', port_id='66',
port={'port': {'network_id': 'a'}}))
def test_update_port_missing_port_on_update(self):
self.plugin.get_port.return_value = {
'device_id': n_const.DEVICE_ID_RESERVED_DHCP_PORT}
self.plugin.update_port.side_effect = n_exc.PortNotFound(port_id='66')
self.assertIsNone(self.callbacks.update_dhcp_port(
context='ctx', host='host', port_id='66',
port={'port': {'network_id': 'a'}}))
def test_get_network_info_return_none_on_not_found(self):
self.plugin.get_network.side_effect = n_exc.NetworkNotFound(net_id='a')
retval = self.callbacks.get_network_info(mock.Mock(), network_id='a')