Process OVS trunk bridges associated to VM deletes

If a VM is deleted while the OVS agent is down, upon
restart the OVS trunk bridge is deleted successfully
but the patch ports integration bridge side are left
behind.

This patch makes sure the bridge as well as its peers
are cleaned up.

Closes-bug: #1623708

Change-Id: I3f010755fdb6501753d20357ad2cd0d6c02cf22a
This commit is contained in:
Armando Migliaccio 2016-09-22 13:15:11 -07:00 committed by Brian Haley
parent 08f2af18f9
commit 4148102f73
6 changed files with 108 additions and 53 deletions

View File

@ -69,8 +69,7 @@ class OVSTrunkSkeleton(agent.TrunkSkeleton):
LOG.error(_LE(
"Error on event %(event)s for subports "
"%(subports)s: %(err)s"),
{'event': event_type, 'subports': subports,
'trunk_id': trunk_id, 'err': e})
{'event': event_type, 'subports': subports, 'err': e})
def init_handler(resource, event, trigger, agent=None):

View File

@ -40,7 +40,7 @@ from neutron.services.trunk.rpc import agent
LOG = logging.getLogger(__name__)
WAIT_FOR_PORT_TIMEOUT = 60
DEFAULT_WAIT_FOR_PORT_TIMEOUT = 60
def lock_on_bridge_name(required_parameter):
@ -114,6 +114,7 @@ class OVSDBHandler(object):
"""
def __init__(self, trunk_manager):
self.timeout = DEFAULT_WAIT_FOR_PORT_TIMEOUT
self._context = n_context.get_admin_context_without_session()
self.trunk_manager = trunk_manager
self.trunk_rpc = agent.TrunkStub()
@ -155,11 +156,7 @@ class OVSDBHandler(object):
:param bridge_name: Name of the created trunk bridge.
"""
# Wait for the VM's port, i.e. the trunk parent port, to show up.
# If the VM fails to show up, i.e. this fails with a timeout,
# then we clean the dangling bridge.
bridge = ovs_lib.OVSBridge(bridge_name)
# Handle condition when there was bridge in both added and removed
# events and handle_trunk_remove greenthread was executed before
# handle_trunk_add
@ -168,19 +165,15 @@ class OVSDBHandler(object):
bridge_name)
return
bridge_has_port_predicate = functools.partial(
bridge_has_instance_port, bridge)
try:
common_utils.wait_until_true(
bridge_has_port_predicate,
timeout=WAIT_FOR_PORT_TIMEOUT)
except eventlet.TimeoutError:
LOG.error(
_LE('No port appeared on trunk bridge %(br_name)s '
'in %(timeout)d seconds. Cleaning up the bridge'),
{'br_name': bridge.br_name,
'timeout': WAIT_FOR_PORT_TIMEOUT})
bridge.destroy()
# Determine the state of the trunk bridge by looking for the VM's port,
# i.e. the trunk parent port and/or patch ports to be present. If the
# VM is absent, then we clean the dangling bridge. If the VM is present
# the value of 'rewire' tells us whether or not the bridge was dealt
# with in a previous added event, and thus it has active patch ports.
if not self._is_vm_connected(bridge):
LOG.debug("No instance port associated to bridge %s could be "
"found. Deleting bridge and its resources.", bridge_name)
self.trunk_manager.dispose_trunk(bridge)
return
# Check if the trunk was provisioned in a previous run. This can happen
@ -455,3 +448,20 @@ class OVSDBHandler(object):
return constants.DEGRADED_STATUS
else:
return constants.ACTIVE_STATUS
def _is_vm_connected(self, bridge):
"""True if an instance is connected to bridge, False otherwise."""
bridge_has_port_predicate = functools.partial(
bridge_has_instance_port, bridge)
try:
common_utils.wait_until_true(
bridge_has_port_predicate,
timeout=self.timeout)
return True
except eventlet.TimeoutError:
LOG.error(
_LE('No port present on trunk bridge %(br_name)s '
'in %(timeout)d seconds.'),
{'br_name': bridge.br_name,
'timeout': self.timeout})
return False

View File

@ -16,7 +16,7 @@ from neutron_lib import constants
from neutron_lib import exceptions
from oslo_log import log as logging
from neutron._i18n import _
from neutron._i18n import _, _LE
from neutron.agent.common import ovs_lib
from neutron.services.trunk.drivers.openvswitch.agent import exceptions as exc
from neutron.services.trunk.drivers.openvswitch import utils
@ -253,6 +253,28 @@ class TrunkManager(object):
except RuntimeError as e:
raise TrunkManagerError(error=e)
def dispose_trunk(self, trunk_bridge):
"""Clean up all the OVS resources associated to trunk_bridge."""
ovsdb = trunk_bridge.ovsdb
patch_peers = []
try:
patch_peers = trunk_bridge.get_ports_attributes(
'Interface', columns=['options'])
with trunk_bridge.ovsdb.transaction() as txn:
for patch_peer in patch_peers:
peer_name = patch_peer['options'].get('peer')
if peer_name:
txn.add(ovsdb.del_port(peer_name, self.br_int.br_name))
txn.add(ovsdb.del_br(trunk_bridge.br_name))
LOG.debug("Deleted bridge '%s' and patch peers '%s'.",
trunk_bridge.br_name, patch_peers)
except RuntimeError as e:
LOG.error(_LE("Could not delete '%(peers)s' associated to "
"trunk bridge %(name)s. Reason: %(reason)s."),
{'peers': patch_peers,
'name': trunk_bridge.br_name,
'reason': e})
def add_sub_port(self, trunk_id, port_id, port_mac, segmentation_id):
"""Create a sub_port.

View File

@ -121,6 +121,18 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
mock.patch.object(polling_manager, 'get_events',
side_effect=filter_events).start()
def _fill_trunk_dict(self, num=3):
ports = self.create_test_ports(amount=num)
self.trunk_dict['port_id'] = ports[0]['id']
self.trunk_dict['sub_ports'] = [trunk_obj.SubPort(
id=uuidutils.generate_uuid(),
port_id=ports[i]['id'],
mac_address=ports[i]['mac_address'],
segmentation_id=i,
trunk_id=self.trunk_dict['id'])
for i in range(1, num)]
return ports
def _test_trunk_creation_helper(self, ports):
self.setup_agent_and_ports(port_dicts=ports)
self.wait_until_ports_state(self.ports, up=True)
@ -130,15 +142,7 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
not self.trunk_br.bridge_exists(self.trunk_br.br_name))
def test_trunk_creation_with_subports(self):
ports = self.create_test_ports(amount=3)
self.trunk_dict['port_id'] = ports[0]['id']
self.trunk_dict['sub_ports'] = [trunk_obj.SubPort(
id=uuidutils.generate_uuid(),
port_id=ports[i]['id'],
mac_address=ports[i]['mac_address'],
segmentation_id=i,
trunk_id=self.trunk_dict['id'])
for i in range(1, 3)]
ports = self._fill_trunk_dict()
self._test_trunk_creation_helper(ports[:1])
def test_trunk_creation_with_no_subports(self):
@ -147,32 +151,14 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
self._test_trunk_creation_helper(ports)
def test_resync(self):
ports = self.create_test_ports(amount=3)
self.trunk_dict['port_id'] = ports[0]['id']
self.trunk_dict['sub_ports'] = [trunk_obj.SubPort(
id=uuidutils.generate_uuid(),
port_id=ports[i]['id'],
mac_address=ports[i]['mac_address'],
segmentation_id=i,
trunk_id=self.trunk_dict['id'])
for i in range(1, 3)]
ports = self._fill_trunk_dict()
self.setup_agent_and_ports(port_dicts=ports)
self.wait_until_ports_state(self.ports, up=True)
self.agent.fullsync = True
self.wait_until_ports_state(self.ports, up=True)
def test_restart(self):
ports = self.create_test_ports(amount=3)
self.trunk_dict['port_id'] = ports[0]['id']
self.trunk_dict['sub_ports'] = [trunk_obj.SubPort(
id=uuidutils.generate_uuid(),
port_id=ports[i]['id'],
mac_address=ports[i]['mac_address'],
segmentation_id=i,
trunk_id=self.trunk_dict['id'])
for i in range(1, 3)]
def test_restart_subport_events(self):
ports = self._fill_trunk_dict()
self.setup_agent_and_ports(port_dicts=ports)
self.wait_until_ports_state(self.ports, up=True)
@ -190,3 +176,19 @@ class OVSDBHandlerTestCase(base.OVSAgentTestFramework):
common_utils.wait_until_true(
lambda: (deleted_sp.patch_port_trunk_name not in
self.trunk_br.get_port_name_list()))
def test_cleanup_on_vm_delete(self):
with mock.patch.object(self.ovsdb_handler, 'handle_trunk_remove'):
br_int = ovs_lib.OVSBridge(self.br_int)
ports = self._fill_trunk_dict()
self.setup_agent_and_ports(port_dicts=ports[:1])
self.wait_until_ports_state(self.ports, up=True)
self.trunk_br.delete_port(self.trunk_port_name)
# We do not expect any instance port to show up on the trunk
# bridge so we can set a much more aggressive timeout and
# fail fast(er).
self.ovsdb_handler.timeout = 1
self.ovsdb_handler.handle_trunk_add(self.trunk_br.br_name)
# Check no resources are left behind.
self.assertFalse(self.trunk_br.exists())
self.assertFalse(ovsdb_handler.bridge_has_service_port(br_int))

View File

@ -225,3 +225,26 @@ class TrunkManagerTestCase(base.BaseSudoTestCase):
self.trunk_manager.remove_trunk(self.trunk.trunk_id,
self.trunk.port_id)
self.tester.wait_for_no_connection(self.tester.INGRESS)
class TrunkManagerDisposeTrunkTestCase(base.BaseSudoTestCase):
def setUp(self):
super(TrunkManagerDisposeTrunkTestCase, self).setUp()
trunk_id = uuidutils.generate_uuid()
self.trunk = trunk_manager.TrunkParentPort(
trunk_id, uuidutils.generate_uuid())
self.trunk.bridge = self.useFixture(
net_helpers.OVSTrunkBridgeFixture(
self.trunk.bridge.br_name)).bridge
self.br_int = self.useFixture(net_helpers.OVSBridgeFixture()).bridge
self.trunk_manager = trunk_manager.TrunkManager(
self.br_int)
def test_dispose_trunk(self):
self.trunk.plug(self.br_int)
self.trunk_manager.dispose_trunk(self.trunk.bridge)
self.assertFalse(
self.trunk.bridge.bridge_exists(self.trunk.bridge.br_name))
self.assertNotIn(self.trunk.patch_port_int_name,
self.br_int.get_port_name_list())

View File

@ -143,9 +143,8 @@ class TestOVSDBHandler(base.BaseTestCase):
@mock.patch('neutron.common.utils.wait_until_true',
side_effect=eventlet.TimeoutError)
def test_handle_trunk_add_interface_wont_appear(self, wut, br):
mock_br = br.return_value
self.ovsdb_handler.handle_trunk_add('foo')
self.assertTrue(mock_br.destroy.called)
self.assertTrue(self.trunk_manager.dispose_trunk.called)
@mock.patch('neutron.agent.common.ovs_lib.OVSBridge')
def test_handle_trunk_add_rpc_failure(self, br):