Call release_dhcp via RPC to ensure correct host

When deleting an instance in a nova-network environment, the network
manager calls release_dhcp() on the local host. The linux_net driver
then executes dhcp_release, a binary that comes with dnsmasq that
releases a DHCP lease on the local host. Upon lease release, dnsmasq
calls its dhcp-script, nova-dhcpbridge. The latter calls
release_fixed_ip() and the instance's fixed IP is returned to the
pool. This is fine if an instance has never been live-migrated.

If an instance has been live-migrated, the dnsmasq on its new
host fails with 'unknown lease' because it's not the same dnsmasq that
originally handed out the lease. Having failed, dnsmasq doesn't call
nova-dhcpbridge and release_fixed_ip() is never called. The fixed IP
is not returned to the pool and a new instance cannot be booted with
that IP.

This patches adds a release_dhcp RPC call that calls release_dhcp on
the instance's "original" host, thus ensuring that the correct dnsmasq
handles the lease release and that nova-dhcpbridge and
release_fixed_ip() are called.

Change-Id: I0eec8c995dd8cff50c37af83018697fc686fe727
Closes-bug: 1585601
This commit is contained in:
Artom Lifshitz 2016-06-03 15:08:08 +00:00
parent caeb8ecb85
commit f9e9b30b93
5 changed files with 136 additions and 3 deletions

View File

@ -1301,6 +1301,10 @@ class MigrationSchedulerRPCError(MigrationError):
msg_fmt = _("Migration select destinations error: %(reason)s")
class RPCPinnedToOldVersion(NovaException):
msg_fmt = _("RPC is pinned to old version")
class MalformedRequestBody(NovaException):
msg_fmt = _("Malformed message body: %(reason)s")

View File

@ -167,7 +167,7 @@ class NetworkManager(manager.Manager):
The one at a time part is to flatten the layout to help scale
"""
target = messaging.Target(version='1.16')
target = messaging.Target(version='1.17')
# If True, this manager requires VIF to create a bridge.
SHOULD_CREATE_BRIDGE = False
@ -983,6 +983,13 @@ class NetworkManager(manager.Manager):
# release_fixed_ip callback will
# get called by nova-dhcpbridge.
try:
self.network_rpcapi.release_dhcp(context,
instance.launched_on,
dev, address,
vif.address)
except exception.RPCPinnedToOldVersion:
# Fall back on previous behaviour of calling
# release_dhcp on the local driver
self.driver.release_dhcp(dev, address, vif.address)
except exception.NetworkDhcpReleaseFailed:
LOG.error(_LE("Error releasing DHCP for IP %(address)s"
@ -1018,6 +1025,9 @@ class NetworkManager(manager.Manager):
# Commit the reservations
quotas.commit()
def release_dhcp(self, context, dev, address, vif_address):
self.driver.release_dhcp(dev, address, vif_address)
def lease_fixed_ip(self, context, address):
"""Called by dhcp-bridge when IP is leased."""
LOG.debug('Leased IP |%s|', address, context=context)

View File

@ -20,6 +20,7 @@ import oslo_messaging as messaging
from oslo_serialization import jsonutils
import nova.conf
from nova import exception
from nova.objects import base as objects_base
from nova import rpc
@ -103,6 +104,8 @@ class NetworkAPI(object):
... Liberty supports message version 1.16. So, any changes to
existing methods in 1.x after that point should be done such that they
can handle the version_cap being set to 1.16.
* 1.17 - Add method release_dhcp()
'''
VERSION_ALIASES = {
@ -173,6 +176,14 @@ class NetworkAPI(object):
cctxt = cctxt.prepare(server=instance.host, version=version)
return cctxt.call(ctxt, 'deallocate_for_instance', **kwargs)
def release_dhcp(self, ctxt, host, dev, address, vif_address):
if self.client.can_send_version('1.17'):
cctxt = self.client.prepare(version='1.17', server=host)
return cctxt.call(ctxt, 'release_dhcp', dev=dev, address=address,
vif_address=vif_address)
else:
raise exception.RPCPinnedToOldVersion()
def add_fixed_ip_to_instance(self, ctxt, instance_id, rxtx_factor,
host, network_id):
cctxt = self.client.prepare(version='1.9')

View File

@ -38,6 +38,7 @@ from nova.network import floating_ips
from nova.network import linux_net
from nova.network import manager as network_manager
from nova.network import model as net_model
from nova.network import rpcapi as network_rpcapi
from nova import objects
from nova.objects import network as network_obj
from nova.objects import virtual_interface as vif_obj
@ -1705,7 +1706,62 @@ class VlanNetworkTestCase(test.TestCase):
def vif_get(_context, _vif_id):
return vifs[0]
def release_dhcp(self, context, instance, dev, address, vif_address):
linux_net.release_dhcp(dev, address, vif_address)
self.stub_out('nova.db.virtual_interface_get', vif_get)
self.stub_out('nova.network.rpcapi.NetworkAPI.release_dhcp',
release_dhcp)
context1 = context.RequestContext('user', fakes.FAKE_PROJECT_ID)
instance = db.instance_create(context1,
{'project_id': fakes.FAKE_PROJECT_ID})
elevated = context1.elevated()
fix_addr = db.fixed_ip_associate_pool(elevated, 1, instance['uuid'])
fixed_get.return_value = dict(test_fixed_ip.fake_fixed_ip,
address=fix_addr.address,
instance_uuid=instance.uuid,
allocated=True,
virtual_interface_id=3,
network=dict(test_network.fake_network,
**networks[1]))
self.flags(force_dhcp_release=True)
self.mox.StubOutWithMock(linux_net, 'release_dhcp')
linux_net.release_dhcp(networks[1]['bridge'], fix_addr.address,
'DE:AD:BE:EF:00:00')
self.mox.ReplayAll()
self.network.deallocate_fixed_ip(context1, fix_addr.address, 'fake')
fixed_update.assert_called_once_with(context1, fix_addr.address,
{'allocated': False})
@mock.patch('nova.db.fixed_ip_get_by_address')
@mock.patch('nova.db.network_get')
@mock.patch('nova.db.fixed_ip_update')
def test_deallocate_fixed_rpc_pinned(self, fixed_update, net_get,
fixed_get):
"""Ensure that if the RPC call to release_dhcp raises a
RPCPinnedToOldVersion, we fall back to the previous behaviour of
calling release_dhcp in the local linux_net driver. In the previous
test, release_dhcp was mocked to call the driver, since this is what
happens on a successful RPC call. In this test, we mock it to raise,
but the expected behaviour is exactly the same - namely that
release_dhcp is called in the linux_net driver, which is why the two
tests are otherwise identical.
"""
net_get.return_value = dict(test_network.fake_network,
**networks[1])
def vif_get(_context, _vif_id):
return vifs[0]
def release_dhcp(self, context, instance, dev, address, vif_address):
raise exception.RPCPinnedToOldVersion()
self.stub_out('nova.db.virtual_interface_get', vif_get)
self.stub_out('nova.network.rpcapi.NetworkAPI.release_dhcp',
release_dhcp)
context1 = context.RequestContext('user', fakes.FAKE_PROJECT_ID)
instance = db.instance_create(context1,
@ -1741,12 +1797,17 @@ class VlanNetworkTestCase(test.TestCase):
def vif_get(_context, _vif_id):
return vifs[0]
def release_dhcp(self, context, instance, dev, address, vif_address):
linux_net.release_dhcp(dev, address, vif_address)
with test.nested(
mock.patch.object(network_rpcapi.NetworkAPI, 'release_dhcp',
release_dhcp),
mock.patch.object(db, 'virtual_interface_get', vif_get),
mock.patch.object(
utils, 'execute',
side_effect=processutils.ProcessExecutionError()),
) as (_vif_get, _execute):
) as (release_dhcp, _vif_get, _execute):
context1 = context.RequestContext('user', fakes.FAKE_PROJECT_ID)
instance = db.instance_create(context1,
@ -3602,13 +3663,15 @@ class NetworkManagerNoDBTestCase(test.NoDBTestCase):
self.assertFalse(mock_disassociate.called,
str(mock_disassociate.mock_calls))
@mock.patch.object(network_rpcapi.NetworkAPI, 'release_dhcp')
@mock.patch.object(objects.FixedIP, 'get_by_address')
@mock.patch.object(objects.VirtualInterface, 'get_by_id')
@mock.patch.object(objects.Quotas, 'reserve')
def test_deallocate_fixed_ip_explicit_disassociate(self,
mock_quota_reserve,
mock_vif_get_by_id,
mock_fip_get_by_addr):
mock_fip_get_by_addr,
mock_release_dhcp):
# Tests that we explicitly call FixedIP.disassociate when the fixed IP
# is not leased and has an associated instance (race with dnsmasq).
self.flags(force_dhcp_release=True)

View File

@ -22,6 +22,7 @@ import mock
from oslo_config import cfg
from nova import context
from nova import exception
from nova.network import rpcapi as network_rpcapi
from nova.objects import base as objects_base
from nova import test
@ -146,6 +147,50 @@ class NetworkRpcAPITestCase(test.NoDBTestCase):
self._test_network_api('deallocate_for_instance', rpc_method='call',
instance=instance, requested_networks={}, version='1.11')
def test_release_dhcp(self):
ctxt = context.RequestContext('fake_user', 'fake_project')
dev = 'eth0'
address = '192.168.65.158'
vif_address = '00:0c:29:2c:b2:64'
host = 'fake-host'
rpcapi = network_rpcapi.NetworkAPI()
call_mock = mock.Mock()
cctxt_mock = mock.Mock(call=call_mock)
with test.nested(
mock.patch.object(rpcapi.client, 'can_send_version',
return_value=True),
mock.patch.object(rpcapi.client, 'prepare',
return_value=cctxt_mock)
) as (
can_send_mock, prepare_mock
):
rpcapi.release_dhcp(ctxt, host, dev, address, vif_address)
can_send_mock.assert_called_once_with('1.17')
prepare_mock.assert_called_once_with(server=host, version='1.17')
call_mock.assert_called_once_with(ctxt, 'release_dhcp', dev=dev,
address=address,
vif_address=vif_address)
def test_release_dhcp_v116(self):
ctxt = context.RequestContext('fake_user', 'fake_project')
dev = 'eth0'
address = '192.168.65.158'
vif_address = '00:0c:29:2c:b2:64'
host = 'fake-host'
rpcapi = network_rpcapi.NetworkAPI()
with mock.patch.object(rpcapi.client, 'can_send_version',
return_value=False) as can_send_mock:
self.assertRaises(exception.RPCPinnedToOldVersion,
rpcapi.release_dhcp, ctxt, host, dev, address,
vif_address)
can_send_mock.assert_called_once_with('1.17')
def test_add_fixed_ip_to_instance(self):
self._test_network_api('add_fixed_ip_to_instance', rpc_method='call',
instance_id='fake_id', rxtx_factor='fake_factor',