Make network_cache more robust with neutron

Currently, nova treats neutron as the source of truth for which ports are
attached to an instance which is a false assumption. Because of this
if someone creates a port in neutron with a device_id that matches one
of their existing instance_ids that port will eventually show up in
nova list (through the periodic heal task).

This problem usually manifests it's self when nova-compute
calls to neutron to create a port and the request times out (though
the port is actually created in neutron). When this occurs the instance
can be rescheduled on another compute node which it will call out to
neutron again to create a port. In this case two ports will show
up in the network_cache table (since they have the same instance_id) though
only one port is attached to the instance.

This patch addresses this issue by only adding ports to network_cache
if nova successfully allocated the port (or it was passed in). This
way these ghost ports are avoided. A follow up patch will come later
that garbage collects these ports.

Closes-bug: #1258620
Closes-bug: #1272195

Change-Id: I961c224d95291727c8614174de07805a0d0a9e46
This commit is contained in:
Aaron Rosen 2013-12-05 17:28:17 -08:00
parent a79ecbeb7d
commit 46922068ac
2 changed files with 258 additions and 137 deletions

View File

@ -290,6 +290,7 @@ class API(base.Base):
touched_port_ids = []
created_port_ids = []
ports_in_requested_order = []
for network in nets:
# If security groups are requested on an instance then the
# network must has a subnet associated with it. Some plugins
@ -317,11 +318,14 @@ class API(base.Base):
if port:
port_client.update_port(port['id'], port_req_body)
touched_port_ids.append(port['id'])
ports_in_requested_order.append(port['id'])
else:
created_port_ids.append(self._create_port(
created_port = self._create_port(
port_client, instance, network_id,
port_req_body, fixed_ips.get(network_id),
security_group_ids, available_macs, dhcp_opts))
security_group_ids, available_macs, dhcp_opts)
created_port_ids.append(created_port)
ports_in_requested_order.append(created_port)
except Exception:
with excutils.save_and_reraise_exception():
for port_id in touched_port_ids:
@ -346,7 +350,8 @@ class API(base.Base):
msg = _("Failed to delete port %s")
LOG.exception(msg, port_id)
nw_info = self.get_instance_nw_info(context, instance, networks=nets)
nw_info = self.get_instance_nw_info(context, instance, networks=nets,
port_ids=ports_in_requested_order)
# NOTE(danms): Only return info about ports we created in this run.
# In the initial allocation case, this will be everything we created,
# and in later runs will only be what was created that time. Thus,
@ -440,23 +445,67 @@ class API(base.Base):
@refresh_cache
def get_instance_nw_info(self, context, instance, networks=None,
use_slave=False):
port_ids=None, use_slave=False):
"""Return network information for specified instance
and update cache.
"""
# NOTE(geekinutah): It would be nice if use_slave had us call
# special APIs that pummeled slaves instead of
# the master. For now we just ignore this arg.
result = self._get_instance_nw_info(context, instance, networks)
result = self._get_instance_nw_info(context, instance, networks,
port_ids)
return result
def _get_instance_nw_info(self, context, instance, networks=None):
def _get_instance_nw_info(self, context, instance, networks=None,
port_ids=None):
# keep this caching-free version of the get_instance_nw_info method
# because it is used by the caching logic itself.
LOG.debug(_('get_instance_nw_info() for %s'), instance['display_name'])
nw_info = self._build_network_info_model(context, instance, networks)
nw_info = self._build_network_info_model(context, instance, networks,
port_ids)
return network_model.NetworkInfo.hydrate(nw_info)
def _gather_port_ids_and_networks(self, context, instance, networks=None,
port_ids=None):
"""Return an instance's complete list of port_ids and networks."""
if ((networks is None and port_ids is not None) or
(port_ids is None and networks is not None)):
message = ("This method needs to be called with either "
"networks=None and port_ids=None or port_ids and "
" networks as not none.")
raise exception.NovaException(message=message)
# Unfortunately, this is sometimes in unicode and sometimes not
if isinstance(instance['info_cache']['network_info'], six.text_type):
ifaces = jsonutils.loads(instance['info_cache']['network_info'])
else:
ifaces = instance['info_cache']['network_info']
# This code path is only done when refreshing the network_cache
if port_ids is None:
port_ids = [iface['id'] for iface in ifaces]
net_ids = [iface['network']['id'] for iface in ifaces]
if networks is None:
networks = self._get_available_networks(context,
instance['project_id'],
net_ids)
# an interface was added/removed from instance.
else:
# Since networks does not contain the existing networks on the
# instance we use their values from the cache and add it.
networks = networks + [
{'id': iface['network']['id'],
'name': iface['network']['label'],
'tenant_id': iface['network']['meta']['tenant_id']}
for iface in ifaces]
# Include existing interfaces so they are not removed from the db.
port_ids = [iface['id'] for iface in ifaces] + port_ids
return networks, port_ids
@refresh_cache
def add_fixed_ip_to_instance(self, context, instance, network_id):
"""Add a fixed ip to the instance from specified network."""
@ -1011,67 +1060,54 @@ class API(base.Base):
network['should_create_bridge'] = should_create_bridge
return network, ovs_interfaceid
def _build_network_info_model(self, context, instance, networks=None):
# Note(arosen): on interface-attach networks only contains the
# network that the interface is being attached to.
def _build_network_info_model(self, context, instance, networks=None,
port_ids=None):
"""Return list of ordered VIFs attached to instance.
:param context - request context.
:param instance - instance we are returning network info for.
:param networks - List of networks being attached to an instance.
If value is None this value will be populated
from the existing cached value.
:param port_ids - List of port_ids that are being attached to an
instance in order of attachment. If value is None
this value will be populated from the existing
cached value.
"""
search_opts = {'tenant_id': instance['project_id'],
'device_id': instance['uuid'], }
client = neutronv2.get_client(context, admin=True)
data = client.list_ports(**search_opts)
ports = data.get('ports', [])
current_neutron_ports = data.get('ports', [])
networks, port_ids = self._gather_port_ids_and_networks(
context, instance, networks, port_ids)
nw_info = network_model.NetworkInfo()
# Unfortunately, this is sometimes in unicode and sometimes not
if isinstance(instance['info_cache']['network_info'], six.text_type):
ifaces = jsonutils.loads(instance['info_cache']['network_info'])
else:
ifaces = instance['info_cache']['network_info']
for current_neutron_port in current_neutron_ports:
if current_neutron_port['id'] in port_ids:
network_IPs = self._nw_info_get_ips(client,
current_neutron_port)
subnets = self._nw_info_get_subnets(context,
current_neutron_port,
network_IPs)
if networks is None:
net_ids = [iface['network']['id'] for iface in ifaces]
networks = self._get_available_networks(context,
instance['project_id'],
net_ids)
devname = "tap" + current_neutron_port['id']
devname = devname[:network_model.NIC_NAME_LEN]
network, ovs_interfaceid = (
self._nw_info_build_network(current_neutron_port,
networks, subnets))
# ensure ports are in preferred network order, and filter out
# those not attached to one of the provided list of networks
else:
# Include existing interfaces so they are not removed from the db.
# Needed when interfaces are added to existing instances.
for iface in ifaces:
nw_info.append(network_model.VIF(
id=iface['id'],
address=iface['address'],
network=iface['network'],
type=iface['type'],
ovs_interfaceid=iface['ovs_interfaceid'],
devname=iface['devname']))
id=current_neutron_port['id'],
address=current_neutron_port['mac_address'],
network=network,
type=current_neutron_port.get('binding:vif_type'),
ovs_interfaceid=ovs_interfaceid,
devname=devname))
net_ids = [n['id'] for n in networks]
ports = [port for port in ports if port['network_id'] in net_ids]
_ensure_requested_network_ordering(lambda x: x['network_id'],
ports, net_ids)
for port in ports:
network_IPs = self._nw_info_get_ips(client, port)
subnets = self._nw_info_get_subnets(context, port, network_IPs)
devname = "tap" + port['id']
devname = devname[:network_model.NIC_NAME_LEN]
network, ovs_interfaceid = self._nw_info_build_network(port,
networks,
subnets)
nw_info.append(network_model.VIF(
id=port['id'],
address=port['mac_address'],
network=network,
type=port.get('binding:vif_type'),
ovs_interfaceid=ovs_interfaceid,
devname=devname))
return nw_info
def _get_subnets_from_port(self, context, port):

View File

@ -336,6 +336,8 @@ class TestNeutronv2Base(test.TestCase):
mox_list_params = {'shared': True}
self.moxed_client.list_networks(
**mox_list_params).AndReturn({'networks': []})
ports_in_requested_net_order = []
for net_id in expected_network_order:
port_req_body = {
'port': {
@ -363,6 +365,7 @@ class TestNeutronv2Base(test.TestCase):
MyComparator(port_req_body)
).AndReturn(
{'port': port})
ports_in_requested_net_order.append(port_id)
else:
fixed_ip = fixed_ips.get(net_id)
if fixed_ip:
@ -385,11 +388,13 @@ class TestNeutronv2Base(test.TestCase):
return api
self.moxed_client.create_port(
MyComparator(port_req_body)).AndReturn(res_port)
ports_in_requested_net_order.append(res_port['port']['id'])
api.get_instance_nw_info(mox.IgnoreArg(),
self.instance,
networks=nets).AndReturn(
self._returned_nw_info)
networks=nets,
port_ids=ports_in_requested_net_order
).AndReturn(self._returned_nw_info)
self.mox.ReplayAll()
return api
@ -418,7 +423,9 @@ class TestNeutronv2Base(test.TestCase):
nets = number == 1 and self.nets1 or self.nets2
net_info_cache = []
for port in port_data:
net_info_cache.append({"network": {"id": port['network_id']}})
net_info_cache.append({"network": {"id": port['network_id']},
"id": port['id']})
instance = copy.copy(self.instance)
# This line here does not wrap net_info_cache in jsonutils.dumps()
# intentionally to test the other code path when it's not unicode.
@ -480,91 +487,150 @@ class TestNeutronv2(TestNeutronv2Base):
self.moxed_client)
self._get_instance_nw_info(2)
def test_get_instance_nw_info_with_nets(self):
# Test get instance_nw_info with networks passed in.
api = neutronapi.API()
self.mox.StubOutWithMock(api.db, 'instance_info_cache_update')
api.db.instance_info_cache_update(
mox.IgnoreArg(),
self.instance['uuid'], mox.IgnoreArg())
self.moxed_client.list_ports(
tenant_id=self.instance['project_id'],
device_id=self.instance['uuid']).AndReturn(
{'ports': self.port_data1})
port_data = self.port_data1
for ip in port_data[0]['fixed_ips']:
self.moxed_client.list_floatingips(
fixed_ip_address=ip['ip_address'],
port_id=port_data[0]['id']).AndReturn(
{'floatingips': self.float_data1})
self.moxed_client.list_subnets(
id=mox.SameElementsAs(['my_subid1'])).AndReturn(
{'subnets': self.subnet_data1})
self.moxed_client.list_ports(
network_id='my_netid1',
device_owner='network:dhcp').AndReturn(
{'ports': self.dhcp_port_data1})
neutronv2.get_client(mox.IgnoreArg(),
admin=True).MultipleTimes().AndReturn(
self.moxed_client)
self.mox.ReplayAll()
self.instance['info_cache'] = {'network_info': []}
nw_inf = api.get_instance_nw_info(self.context,
self.instance,
networks=self.nets1)
self._verify_nw_info(nw_inf, 0)
def test_get_instance_nw_info_with_nets_and_info_cache(self):
def test_get_instance_nw_info_with_nets_add_interface(self):
# This tests that adding an interface to an instance does not
# remove the first instance from the instance.
api = neutronapi.API()
self.mox.StubOutWithMock(api.db, 'instance_info_cache_update')
api.db.instance_info_cache_update(
mox.IgnoreArg(),
self.instance['uuid'], mox.IgnoreArg())
self.moxed_client.list_ports(
tenant_id=self.instance['project_id'],
device_id=self.instance['uuid']).AndReturn(
{'ports': self.port_data1})
port_data = self.port_data1
for ip in port_data[0]['fixed_ips']:
self.moxed_client.list_floatingips(
fixed_ip_address=ip['ip_address'],
port_id=port_data[0]['id']).AndReturn(
{'floatingips': self.float_data1})
self.moxed_client.list_subnets(
id=mox.SameElementsAs(['my_subid1'])).AndReturn(
{'subnets': self.subnet_data1})
self.moxed_client.list_ports(
network_id='my_netid1',
device_owner='network:dhcp').AndReturn(
{'ports': self.dhcp_port_data1})
neutronv2.get_client(mox.IgnoreArg(),
admin=True).MultipleTimes().AndReturn(
self.moxed_client)
self.mox.ReplayAll()
network_model = model.Network(id='network_id',
bridge='br-int',
injected='injected',
label='fake_network',
tenant_name='fake_tenant')
self.instance['info_cache'] = {
'network_info': [{'id': 'port_id',
tenant_id='fake_tenant')
network_cache = {'info_cache': {
'network_info': [{'id': self.port_data2[0]['id'],
'address': 'mac_address',
'network': network_model,
'type': 'ovs',
'ovs_interfaceid': 'ovs_interfaceid',
'devname': 'devname'}]}
'devname': 'devname'}]}}
self._fake_get_instance_nw_info_helper(network_cache,
self.port_data2,
self.nets2,
[self.port_data2[1]['id']])
def test_get_instance_nw_info_remove_ports_from_neutron(self):
# This tests that when a port is removed in neutron it
# is also removed from the nova.
network_model = model.Network(id=self.port_data2[0]['network_id'],
bridge='br-int',
injected='injected',
label='fake_network',
tenant_id='fake_tenant')
network_cache = {'info_cache': {
'network_info': [{'id': 'network_id',
'address': 'mac_address',
'network': network_model,
'type': 'ovs',
'ovs_interfaceid': 'ovs_interfaceid',
'devname': 'devname'}]}}
self._fake_get_instance_nw_info_helper(network_cache,
self.port_data2,
None,
None)
def test_get_instance_nw_info_ignores_neturon_ports(self):
# Tests that only ports in the network_cache are updated
# and ports returned from neutron that match the same
# instance_id/device_id are ignored.
port_data2 = copy.copy(self.port_data2)
# set device_id on the ports to be the same.
port_data2[1]['device_id'] = port_data2[0]['device_id']
network_model = model.Network(id='network_id',
bridge='br-int',
injected='injected',
label='fake_network',
tenant_id='fake_tenant')
network_cache = {'info_cache': {
'network_info': [{'id': 'network_id',
'address': 'mac_address',
'network': network_model,
'type': 'ovs',
'ovs_interfaceid': 'ovs_interfaceid',
'devname': 'devname'}]}}
self._fake_get_instance_nw_info_helper(network_cache,
port_data2,
None,
None)
def _fake_get_instance_nw_info_helper(self, network_cache,
current_neutron_ports,
networks=None, port_ids=None):
"""Helper function to test get_instance_nw_info.
:param network_cache - data already in the nova network cache.
:param current_neutron_ports - updated list of ports from neutron.
:param networks - networks of ports being added to instance.
:param port_ids - new ports being added to instance.
"""
# keep a copy of the original ports/networks to pass to
# get_instance_nw_info() as the code below changes them.
original_port_ids = copy.copy(port_ids)
original_networks = copy.copy(networks)
api = neutronapi.API()
self.mox.StubOutWithMock(api.db, 'instance_info_cache_update')
api.db.instance_info_cache_update(
mox.IgnoreArg(),
self.instance['uuid'], mox.IgnoreArg())
neutronv2.get_client(mox.IgnoreArg(),
admin=True).MultipleTimes().AndReturn(
self.moxed_client)
self.moxed_client.list_ports(
tenant_id=self.instance['project_id'],
device_id=self.instance['uuid']).AndReturn(
{'ports': current_neutron_ports})
ifaces = network_cache['info_cache']['network_info']
if port_ids is None:
port_ids = [iface['id'] for iface in ifaces]
net_ids = [iface['network']['id'] for iface in ifaces]
nets = [{'id': iface['network']['id'],
'name': iface['network']['label'],
'tenant_id': iface['network']['meta']['tenant_id']}
for iface in ifaces]
if networks is None:
self.moxed_client.list_networks(
id=net_ids).AndReturn({'networks': nets})
else:
networks = networks + [
dict(id=iface['network']['id'],
name=iface['network']['label'],
tenant_id=iface['network']['meta']['tenant_id'])
for iface in ifaces]
port_ids = [iface['id'] for iface in ifaces] + port_ids
index = 0
for current_neutron_port in current_neutron_ports:
if current_neutron_port['id'] in port_ids:
for ip in current_neutron_port['fixed_ips']:
self.moxed_client.list_floatingips(
fixed_ip_address=ip['ip_address'],
port_id=current_neutron_port['id']).AndReturn(
{'floatingips': [self.float_data2[index]]})
self.moxed_client.list_subnets(
id=mox.SameElementsAs([ip['subnet_id']])
).AndReturn(
{'subnets': [self.subnet_data_n[index]]})
self.moxed_client.list_ports(
network_id=current_neutron_port['network_id'],
device_owner='network:dhcp').AndReturn(
{'ports': self.dhcp_port_data1})
index += 1
self.mox.ReplayAll()
self.instance['info_cache'] = network_cache
instance = copy.copy(self.instance)
instance['info_cache'] = network_cache['info_cache']
nw_inf = api.get_instance_nw_info(self.context,
self.instance,
networks=self.nets1)
self.assertEqual(2, len(nw_inf))
for k, v in self.instance['info_cache']['network_info'][0].iteritems():
self.assertEqual(nw_inf[0][k], v)
# remove first inf and verify that the second interface is correct
del nw_inf[0]
self._verify_nw_info(nw_inf, 0)
instance,
networks=original_networks,
port_ids=original_port_ids)
self.assertEqual(index, len(nw_inf))
def test_get_instance_nw_info_without_subnet(self):
# Test get instance_nw_info for a port without subnet.
@ -589,7 +655,8 @@ class TestNeutronv2(TestNeutronv2Base):
net_info_cache = []
for port in self.port_data3:
net_info_cache.append({"network": {"id": port['network_id']}})
net_info_cache.append({"network": {"id": port['network_id']},
"id": port['id']})
instance = copy.copy(self.instance)
instance['info_cache'] = {'network_info':
six.text_type(
@ -927,7 +994,8 @@ class TestNeutronv2(TestNeutronv2Base):
net_info_cache = []
for port in port_data:
net_info_cache.append({"network": {"id": port['network_id']}})
net_info_cache.append({"network": {"id": port['network_id']},
"id": port['id']})
instance = copy.copy(self.instance)
instance['info_cache'] = {'network_info':
six.text_type(
@ -1832,7 +1900,8 @@ class TestNeutronv2(TestNeutronv2Base):
self.mox.ReplayAll()
neutronv2.get_client('fake')
nw_info = api._build_network_info_model(self.context, fake_inst,
fake_nets)
fake_nets,
[fake_ports[0]['id']])
self.assertEqual(len(nw_info), 1)
self.assertEqual(nw_info[0]['id'], 'port0')
self.assertEqual(nw_info[0]['address'], 'de:ad:be:ef:00:01')
@ -1850,6 +1919,22 @@ class TestNeutronv2(TestNeutronv2Base):
class TestNeutronv2ModuleMethods(test.TestCase):
def test_gather_port_ids_and_networks_wrong_params(self):
api = neutronapi.API()
# Test with networks not None and port_ids is None
self.assertRaises(exception.NovaException,
api._gather_port_ids_and_networks,
'fake_context', 'fake_instance',
[{'network': {'name': 'foo'}}], None)
# Test with networks is None and port_ids not None
self.assertRaises(exception.NovaException,
api._gather_port_ids_and_networks,
'fake_context', 'fake_instance',
None, ['list', 'of', 'port_ids'])
def test_ensure_requested_network_ordering_no_preference_ids(self):
l = [1, 2, 3]