Connectivity tests for OVS agent failures/restarts
Adding two tests:
* A test that for native ovs-ofctl interface verifies that stopping the
ovs-neutron-agent does not disrupt network traffic. Stopping the agent
means also stopping the OVS bridge controller, hence OVS can decide to
take over management of OpenFlow rules, clear them up, and this way
cause network traffic disruption.
* A test that creates two ports in a single network, then starts
pinging one from the other while restarting OVS agents. The test verifies
that no packet is lost during OVS agent restarts.
Change-Id: I2cd1195fc0622c8c8d614f00e9dd6884ad388d69
Related-Bug: 1514056
Related-Bug: 1607787
(cherry picked from commit 27aee4a9c5
)
This commit is contained in:
parent
d963792542
commit
81bb6aa348
|
@ -127,9 +127,10 @@ def assert_async_ping(src_namespace, dst_ip, timeout=1, count=1, interval=1):
|
|||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def async_ping(namespace, ips):
|
||||
def async_ping(namespace, ips, timeout=1, count=10):
|
||||
with futures.ThreadPoolExecutor(max_workers=len(ips)) as executor:
|
||||
fs = [executor.submit(assert_async_ping, namespace, ip, count=10)
|
||||
fs = [executor.submit(assert_async_ping, namespace, ip, count=count,
|
||||
timeout=timeout)
|
||||
for ip in ips]
|
||||
yield lambda: all(f.done() for f in fs)
|
||||
futures.wait(fs)
|
||||
|
|
|
@ -61,12 +61,22 @@ class ClientFixture(fixtures.Fixture):
|
|||
|
||||
return self._create_resource(resource_type, spec)
|
||||
|
||||
def create_network(self, tenant_id, name=None, external=False):
|
||||
def create_network(self, tenant_id, name=None, external=False,
|
||||
network_type=None, segmentation_id=None,
|
||||
physical_network=None):
|
||||
resource_type = 'network'
|
||||
|
||||
name = name or utils.get_rand_name(prefix=resource_type)
|
||||
spec = {'tenant_id': tenant_id, 'name': name}
|
||||
spec['router:external'] = external
|
||||
|
||||
if segmentation_id is not None:
|
||||
spec['provider:segmentation_id'] = segmentation_id
|
||||
if network_type is not None:
|
||||
spec['provider:network_type'] = network_type
|
||||
if physical_network is not None:
|
||||
spec['provider:physical_network'] = physical_network
|
||||
|
||||
return self._create_resource(resource_type, spec)
|
||||
|
||||
def create_subnet(self, tenant_id, network_id,
|
||||
|
|
|
@ -25,6 +25,8 @@ from neutron.tests.common import config_fixtures
|
|||
from neutron.tests.common.exclusive_resources import port
|
||||
from neutron.tests.common import helpers as c_helpers
|
||||
|
||||
PHYSICAL_NETWORK_NAME = "physnet1"
|
||||
|
||||
|
||||
class ConfigFixture(fixtures.Fixture):
|
||||
"""A fixture that holds an actual Neutron configuration.
|
||||
|
@ -129,7 +131,7 @@ class ML2ConfigFixture(ConfigFixture):
|
|||
'mechanism_drivers': mechanism_drivers,
|
||||
},
|
||||
'ml2_type_vlan': {
|
||||
'network_vlan_ranges': 'physnet1:1000:2999',
|
||||
'network_vlan_ranges': PHYSICAL_NETWORK_NAME + ':1000:2999',
|
||||
},
|
||||
'ml2_type_gre': {
|
||||
'tunnel_id_ranges': '1:1000',
|
||||
|
@ -192,7 +194,8 @@ class OVSConfigFixture(ConfigFixture):
|
|||
super(OVSConfigFixture, self)._setUp()
|
||||
|
||||
def _generate_bridge_mappings(self):
|
||||
return 'physnet1:%s' % utils.get_rand_device_name(prefix='br-eth')
|
||||
return '%s:%s' % (PHYSICAL_NETWORK_NAME,
|
||||
utils.get_rand_device_name(prefix='br-eth'))
|
||||
|
||||
def _generate_integration_bridge(self):
|
||||
return utils.get_rand_device_name(prefix='br-int')
|
||||
|
@ -259,7 +262,7 @@ class LinuxBridgeConfigFixture(ConfigFixture):
|
|||
})
|
||||
|
||||
def _generate_bridge_mappings(self, device_name):
|
||||
return 'physnet1:%s' % device_name
|
||||
return '%s:%s' % (PHYSICAL_NETWORK_NAME, device_name)
|
||||
|
||||
|
||||
class L3ConfigFixture(ConfigFixture):
|
||||
|
|
|
@ -297,6 +297,13 @@ class Host(fixtures.Fixture):
|
|||
def linuxbridge_agent(self, agent):
|
||||
self.agents['linuxbridge'] = agent
|
||||
|
||||
@property
|
||||
def l2_agent(self):
|
||||
if self.host_desc.l2_agent_type == constants.AGENT_TYPE_LINUXBRIDGE:
|
||||
return self.linuxbridge_agent
|
||||
elif self.host_desc.l2_agent_type == constants.AGENT_TYPE_OVS:
|
||||
return self.ovs_agent
|
||||
|
||||
|
||||
class Environment(fixtures.Fixture):
|
||||
"""Represents a deployment topology.
|
||||
|
|
|
@ -21,6 +21,7 @@ import signal
|
|||
import fixtures
|
||||
from neutronclient.common import exceptions as nc_exc
|
||||
from neutronclient.v2_0 import client
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import fileutils
|
||||
|
||||
from neutron.agent.linux import async_process
|
||||
|
@ -31,6 +32,8 @@ from neutron.tests import base
|
|||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack import base as fullstack_base
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProcessFixture(fixtures.Fixture):
|
||||
def __init__(self, test_name, process_name, exec_name, config_filenames,
|
||||
|
@ -66,13 +69,28 @@ class ProcessFixture(fixtures.Fixture):
|
|||
cmd, run_as_root=run_as_root, namespace=self.namespace
|
||||
)
|
||||
self.process.start(block=True)
|
||||
LOG.debug("Process started: %s", self.process_name)
|
||||
|
||||
def stop(self):
|
||||
def stop(self, kill_signal=None):
|
||||
kill_signal = kill_signal or self.kill_signal
|
||||
try:
|
||||
self.process.stop(block=True, kill_signal=self.kill_signal)
|
||||
self.process.stop(block=True, kill_signal=kill_signal)
|
||||
except async_process.AsyncProcessException as e:
|
||||
if "Process is not running" not in str(e):
|
||||
raise
|
||||
LOG.debug("Process stopped: %s", self.process_name)
|
||||
|
||||
def restart(self, executor=None):
|
||||
def _restart():
|
||||
self.stop()
|
||||
self.start()
|
||||
|
||||
LOG.debug("Restarting process: %s", self.process_name)
|
||||
|
||||
if executor is None:
|
||||
_restart()
|
||||
else:
|
||||
return executor.submit(_restart)
|
||||
|
||||
|
||||
class RabbitmqEnvironmentFixture(fixtures.Fixture):
|
||||
|
@ -101,7 +119,18 @@ class RabbitmqEnvironmentFixture(fixtures.Fixture):
|
|||
utils.execute(cmd, run_as_root=True)
|
||||
|
||||
|
||||
class NeutronServerFixture(fixtures.Fixture):
|
||||
class ServiceFixture(fixtures.Fixture):
|
||||
def restart(self, executor=None):
|
||||
return self.process_fixture.restart(executor=executor)
|
||||
|
||||
def start(self):
|
||||
return self.process_fixture.start()
|
||||
|
||||
def stop(self, kill_signal=None):
|
||||
return self.process_fixture.stop(kill_signal=kill_signal)
|
||||
|
||||
|
||||
class NeutronServerFixture(ServiceFixture):
|
||||
|
||||
NEUTRON_SERVER = "neutron-server"
|
||||
|
||||
|
@ -141,7 +170,7 @@ class NeutronServerFixture(fixtures.Fixture):
|
|||
return client.Client(auth_strategy="noauth", endpoint_url=url)
|
||||
|
||||
|
||||
class OVSAgentFixture(fixtures.Fixture):
|
||||
class OVSAgentFixture(ServiceFixture):
|
||||
|
||||
NEUTRON_OVS_AGENT = "neutron-openvswitch-agent"
|
||||
|
||||
|
@ -174,7 +203,7 @@ class OVSAgentFixture(fixtures.Fixture):
|
|||
kill_signal=signal.SIGTERM))
|
||||
|
||||
|
||||
class LinuxBridgeAgentFixture(fixtures.Fixture):
|
||||
class LinuxBridgeAgentFixture(ServiceFixture):
|
||||
|
||||
NEUTRON_LINUXBRIDGE_AGENT = "neutron-linuxbridge-agent"
|
||||
|
||||
|
@ -206,7 +235,7 @@ class LinuxBridgeAgentFixture(fixtures.Fixture):
|
|||
)
|
||||
|
||||
|
||||
class L3AgentFixture(fixtures.Fixture):
|
||||
class L3AgentFixture(ServiceFixture):
|
||||
|
||||
NEUTRON_L3_AGENT = "neutron-l3-agent"
|
||||
|
||||
|
|
|
@ -12,11 +12,18 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from concurrent import futures
|
||||
import signal
|
||||
|
||||
from neutron_lib import constants
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import uuidutils
|
||||
import testscenarios
|
||||
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack import base
|
||||
from neutron.tests.fullstack.resources import config
|
||||
from neutron.tests.fullstack.resources import environment
|
||||
from neutron.tests.fullstack.resources import machine
|
||||
from neutron.tests.fullstack import utils
|
||||
|
@ -24,6 +31,10 @@ from neutron.tests.unit import testlib_api
|
|||
|
||||
load_tests = testlib_api.module_load_tests
|
||||
|
||||
SEGMENTATION_ID = 1234
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseConnectivitySameNetworkTest(base.BaseFullStackTestCase):
|
||||
|
||||
|
@ -31,6 +42,8 @@ class BaseConnectivitySameNetworkTest(base.BaseFullStackTestCase):
|
|||
ovsdb_interface = None
|
||||
arp_responder = False
|
||||
|
||||
num_hosts = 3
|
||||
|
||||
def setUp(self):
|
||||
host_descriptions = [
|
||||
# There's value in enabling L3 agents registration when l2pop
|
||||
|
@ -40,7 +53,8 @@ class BaseConnectivitySameNetworkTest(base.BaseFullStackTestCase):
|
|||
l3_agent=self.l2_pop,
|
||||
of_interface=self.of_interface,
|
||||
ovsdb_interface=self.ovsdb_interface,
|
||||
l2_agent_type=self.l2_agent_type) for _ in range(3)]
|
||||
l2_agent_type=self.l2_agent_type)
|
||||
for _ in range(self.num_hosts)]
|
||||
env = environment.Environment(
|
||||
environment.EnvironmentDescription(
|
||||
network_type=self.network_type,
|
||||
|
@ -49,23 +63,39 @@ class BaseConnectivitySameNetworkTest(base.BaseFullStackTestCase):
|
|||
host_descriptions)
|
||||
super(BaseConnectivitySameNetworkTest, self).setUp(env)
|
||||
|
||||
def _test_connectivity(self):
|
||||
tenant_uuid = uuidutils.generate_uuid()
|
||||
def _prepare_network(self, tenant_uuid):
|
||||
net_args = {'network_type': self.network_type}
|
||||
if self.network_type in ['flat', 'vlan']:
|
||||
net_args['physical_network'] = config.PHYSICAL_NETWORK_NAME
|
||||
if self.network_type in ['vlan', 'gre', 'vxlan']:
|
||||
net_args['segmentation_id'] = SEGMENTATION_ID
|
||||
|
||||
network = self.safe_client.create_network(tenant_uuid)
|
||||
network = self.safe_client.create_network(tenant_uuid, **net_args)
|
||||
self.safe_client.create_subnet(
|
||||
tenant_uuid, network['id'], '20.0.0.0/24')
|
||||
|
||||
vms = machine.FakeFullstackMachinesList([
|
||||
return network
|
||||
|
||||
def _prepare_vms_in_net(self, tenant_uuid, network):
|
||||
vms = machine.FakeFullstackMachinesList(
|
||||
self.useFixture(
|
||||
machine.FakeFullstackMachine(
|
||||
self.environment.hosts[i],
|
||||
host,
|
||||
network['id'],
|
||||
tenant_uuid,
|
||||
self.safe_client))
|
||||
for i in range(3)])
|
||||
for host in self.environment.hosts)
|
||||
|
||||
vms.block_until_all_boot()
|
||||
return vms
|
||||
|
||||
def _prepare_vms_in_single_network(self):
|
||||
tenant_uuid = uuidutils.generate_uuid()
|
||||
network = self._prepare_network(tenant_uuid)
|
||||
return self._prepare_vms_in_net(tenant_uuid, network)
|
||||
|
||||
def _test_connectivity(self):
|
||||
vms = self._prepare_vms_in_single_network()
|
||||
vms.ping_all()
|
||||
|
||||
|
||||
|
@ -87,6 +117,61 @@ class TestOvsConnectivitySameNetwork(BaseConnectivitySameNetworkTest):
|
|||
self._test_connectivity()
|
||||
|
||||
|
||||
class TestOvsConnectivitySameNetworkOnOvsBridgeControllerStop(
|
||||
BaseConnectivitySameNetworkTest):
|
||||
|
||||
num_hosts = 2
|
||||
|
||||
l2_agent_type = constants.AGENT_TYPE_OVS
|
||||
network_scenarios = [
|
||||
('VXLAN', {'network_type': 'vxlan',
|
||||
'l2_pop': False}),
|
||||
('GRE and l2pop', {'network_type': 'gre',
|
||||
'l2_pop': True}),
|
||||
('VLANs', {'network_type': 'vlan',
|
||||
'l2_pop': False})]
|
||||
|
||||
# Do not test for CLI ofctl interface as controller is irrelevant for CLI
|
||||
scenarios = testscenarios.multiply_scenarios(
|
||||
network_scenarios,
|
||||
[(m, v) for (m, v) in utils.get_ovs_interface_scenarios()
|
||||
if v['of_interface'] != 'ovs-ofctl'])
|
||||
|
||||
def _test_controller_timeout_does_not_break_connectivity(self,
|
||||
kill_signal=None):
|
||||
# Environment preparation is effectively the same as connectivity test
|
||||
vms = self._prepare_vms_in_single_network()
|
||||
|
||||
ns0 = vms[0].namespace
|
||||
ip1 = vms[1].ip
|
||||
|
||||
LOG.debug("Stopping agents (hence also OVS bridge controllers)")
|
||||
for host in self.environment.hosts:
|
||||
if kill_signal is not None:
|
||||
host.l2_agent.stop(kill_signal=kill_signal)
|
||||
else:
|
||||
host.l2_agent.stop()
|
||||
|
||||
# Ping to make sure that 3 x 5 seconds is overcame even under a high
|
||||
# load. The time was chosen to match three times inactivity_probe time,
|
||||
# which is the time after which the OVS vswitchd
|
||||
# treats the controller as dead and starts managing the bridge
|
||||
# by itself when the fail type settings is not set to secure (see
|
||||
# ovs-vsctl man page for further details)
|
||||
with net_helpers.async_ping(ns0, [ip1], timeout=2, count=25) as done:
|
||||
common_utils.wait_until_true(
|
||||
done,
|
||||
exception=RuntimeError("Networking interrupted after "
|
||||
"controllers have vanished"))
|
||||
|
||||
def test_controller_timeout_does_not_break_connectivity_sigterm(self):
|
||||
self._test_controller_timeout_does_not_break_connectivity()
|
||||
|
||||
def test_controller_timeout_does_not_break_connectivity_sigkill(self):
|
||||
self._test_controller_timeout_does_not_break_connectivity(
|
||||
signal.SIGKILL)
|
||||
|
||||
|
||||
class TestLinuxBridgeConnectivitySameNetwork(BaseConnectivitySameNetworkTest):
|
||||
|
||||
l2_agent_type = constants.AGENT_TYPE_LINUXBRIDGE
|
||||
|
@ -101,3 +186,59 @@ class TestLinuxBridgeConnectivitySameNetwork(BaseConnectivitySameNetworkTest):
|
|||
|
||||
def test_connectivity(self):
|
||||
self._test_connectivity()
|
||||
|
||||
|
||||
class TestUninterruptedConnectivityOnL2AgentRestart(
|
||||
BaseConnectivitySameNetworkTest):
|
||||
|
||||
num_hosts = 2
|
||||
|
||||
ovs_agent_scenario = [('OVS',
|
||||
{'l2_agent_type': constants.AGENT_TYPE_OVS})]
|
||||
lb_agent_scenario = [('LB',
|
||||
{'l2_agent_type': constants.AGENT_TYPE_LINUXBRIDGE})]
|
||||
|
||||
network_scenarios = [
|
||||
('Flat network', {'network_type': 'flat',
|
||||
'l2_pop': False}),
|
||||
('VLANs', {'network_type': 'vlan',
|
||||
'l2_pop': False}),
|
||||
('VXLAN', {'network_type': 'vxlan',
|
||||
'l2_pop': False}),
|
||||
]
|
||||
scenarios = (
|
||||
testscenarios.multiply_scenarios(ovs_agent_scenario, network_scenarios,
|
||||
utils.get_ovs_interface_scenarios()) +
|
||||
testscenarios.multiply_scenarios(lb_agent_scenario, network_scenarios)
|
||||
)
|
||||
|
||||
def test_l2_agent_restart(self, agent_restart_timeout=20):
|
||||
# Environment preparation is effectively the same as connectivity test
|
||||
vms = self._prepare_vms_in_single_network()
|
||||
|
||||
ns0 = vms[0].namespace
|
||||
ip1 = vms[1].ip
|
||||
agents = [host.l2_agent for host in self.environment.hosts]
|
||||
|
||||
# Restart agents on all nodes simultaneously while pinging across
|
||||
# the hosts. The ping has to cross int and phys bridges and travels
|
||||
# via central bridge as the vms are on separate hosts.
|
||||
with net_helpers.async_ping(ns0, [ip1], timeout=2,
|
||||
count=agent_restart_timeout) as done:
|
||||
LOG.debug("Restarting agents")
|
||||
executor = futures.ThreadPoolExecutor(max_workers=len(agents))
|
||||
restarts = [agent.restart(executor=executor)
|
||||
for agent in agents]
|
||||
|
||||
futures.wait(restarts, timeout=agent_restart_timeout)
|
||||
|
||||
self.assertTrue(all([r.done() for r in restarts]))
|
||||
LOG.debug("Restarting agents - done")
|
||||
|
||||
# It is necessary to give agents time to initialize
|
||||
# because some crucial steps (e.g. setting up bridge flows)
|
||||
# happen only after RPC is established
|
||||
common_utils.wait_until_true(
|
||||
done,
|
||||
exception=RuntimeError("Could not ping the other VM, L2 agent "
|
||||
"restart leads to network disruption"))
|
||||
|
|
Loading…
Reference in New Issue