diff --git a/etc/vpn_agent.ini b/etc/vpn_agent.ini index 3b095e7de..932bac73a 100644 --- a/etc/vpn_agent.ini +++ b/etc/vpn_agent.ini @@ -33,7 +33,7 @@ # Default is for ubuntu use, /etc/strongswan.d # default_config_area=/etc/strongswan.d -[libreswan] +[pluto] # Initial interval in seconds for checking if pluto daemon is shutdown # shutdown_check_timeout=1 # diff --git a/neutron_vpnaas/services/vpn/device_drivers/ipsec.py b/neutron_vpnaas/services/vpn/device_drivers/ipsec.py index 5f4900265..74e83743f 100644 --- a/neutron_vpnaas/services/vpn/device_drivers/ipsec.py +++ b/neutron_vpnaas/services/vpn/device_drivers/ipsec.py @@ -20,6 +20,7 @@ import shutil import six import socket +import eventlet import jinja2 import netaddr from neutron.agent.linux import ip_lib @@ -28,7 +29,7 @@ from neutron.api.v2 import attributes from neutron.common import rpc as n_rpc from neutron.common import utils as n_utils from neutron import context -from neutron.i18n import _LE +from neutron.i18n import _LE, _LI, _LW from neutron.plugins.common import constants from neutron.plugins.common import utils as plugin_utils from oslo_concurrency import lockutils @@ -77,6 +78,26 @@ openswan_opts = [ cfg.CONF.register_opts(openswan_opts, 'openswan') +pluto_opts = [ + cfg.IntOpt('shutdown_check_timeout', + default=1, + help=_('Initial interval in seconds for checking if pluto ' + 'daemon is shutdown'), + deprecated_group='libreswan'), + cfg.IntOpt('shutdown_check_retries', + default=5, + help=_('The maximum number of retries for checking for ' + 'pluto daemon shutdown'), + deprecated_group='libreswan'), + cfg.FloatOpt('shutdown_check_back_off', + default=1.5, + help=_('A factor to increase the retry interval for ' + 'each retry'), + deprecated_group='libreswan') +] + +cfg.CONF.register_opts(pluto_opts, 'pluto') + JINJA_ENV = None IPSEC_CONNS = 'ipsec_site_connections' @@ -333,6 +354,7 @@ class OpenSwanProcess(BaseSwanProcess): self.etc_dir, 'ipsec.conf') self.pid_path = os.path.join( self.config_dir, 'var', 'run', 'pluto') + self.pid_file = '%s.pid' % self.pid_path def _execute(self, cmd, check_exit_code=True, extra_ok_codes=None): """Execute command on namespace.""" @@ -357,6 +379,61 @@ class OpenSwanProcess(BaseSwanProcess): self.vpnservice, 0o600) + def _process_running(self): + """Checks if process is still running.""" + + # If no PID file, we assume the process is not running. + if not os.path.exists(self.pid_file): + return False + + try: + # We take an ask-forgiveness-not-permission approach and rely + # on throwing to tell us something. If the pid file exists, + # delve into the process information and check if it matches + # our expected command line. + with open(self.pid_file, 'r') as f: + pid = f.readline().strip() + with open('/proc/%s/cmdline' % pid) as cmd_line_file: + cmd_line = cmd_line_file.readline() + if self.pid_path in cmd_line and 'pluto' in cmd_line: + # Okay the process is probably a pluto process + # and it contains the pid_path in the command + # line... could be a race. Log to error and return + # that it is *NOT* okay to clean up files. We are + # logging to error instead of debug because it + # indicates something bad has happened and this is + # valuable information for figuring it out. + LOG.error(_LE('Process %(pid)s exists with command ' + 'line %(cmd_line)s.') % + {'pid': pid, 'cmd_line': cmd_line}) + return True + + except IOError as e: + # This is logged as "info" instead of error because it simply + # means that we couldn't find the files to check on them. + LOG.info(_LI('Unable to find control files on startup for ' + 'router %(router)s: %(msg)s'), + {'router': self.id, 'msg': e}) + return False + + def _cleanup_control_files(self): + try: + ctl_file = '%s.ctl' % self.pid_path + LOG.debug('Removing %(pidfile)s and %(ctlfile)s', + {'pidfile': self.pid_file, + 'ctlfile': ctl_file}) + + if os.path.exists(self.pid_file): + os.remove(self.pid_file) + + if os.path.exists(ctl_file): + os.remove(ctl_file) + + except OSError as e: + LOG.error(_LE('Unable to remove pluto control ' + 'files for router %(router)s. %(msg)s'), + {'router': self.id, 'msg': e}) + def get_status(self): return self._execute([self.binary, 'whack', @@ -366,7 +443,21 @@ class OpenSwanProcess(BaseSwanProcess): def restart(self): """Restart the process.""" + # stop() followed immediately by a start() runs the risk that the + # current pluto daemon has not had a chance to shutdown. We check + # the current process information to see if the daemon is still + # running and if so, wait a short interval and retry. self.stop() + wait_interval = cfg.CONF.pluto.shutdown_check_timeout + for i in range(cfg.CONF.pluto.shutdown_check_retries): + if not self._process_running(): + self._cleanup_control_files() + break + eventlet.sleep(wait_interval) + wait_interval *= cfg.CONF.pluto.shutdown_check_back_off + else: + LOG.warning(_LW('Server appears to still be running, restart ' + 'of router %s may fail'), self.id) self.start() return @@ -419,6 +510,22 @@ class OpenSwanProcess(BaseSwanProcess): """ if not self.namespace: return + + # NOTE: The restart operation calls the parent's start() instead of + # this one to avoid having to special case the startup file check. + # If anything is added to this method that needs to run whenever + # a restart occurs, it should be either added to the restart() + # override or things refactored to special-case start() when + # called from restart(). + + # If, by any reason, ctl and pid files weren't cleaned up, pluto + # won't be able to rewrite them and will fail to start. So we check + # to see if the process is running and if not, attempt a cleanup. + # In either case we fall through to allow the pluto process to + # start or fail in the usual way. + if not self._process_running(): + self._cleanup_control_files() + virtual_private = self._virtual_privates() #start pluto IKE keying daemon cmd = [self.binary, diff --git a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py index 227c99d06..c7fe2f513 100644 --- a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py +++ b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py @@ -15,33 +15,12 @@ import os import os.path -import eventlet - -from neutron.i18n import _LE, _LI, _LW -from oslo_config import cfg from oslo_log import log as logging from neutron_vpnaas.services.vpn.device_drivers import ipsec LOG = logging.getLogger(__name__) -libreswan_opts = [ - cfg.IntOpt('shutdown_check_timeout', - default=1, - help=_('Initial interval in seconds for checking if pluto ' - 'daemon is shutdown')), - cfg.IntOpt('shutdown_check_retries', - default=5, - help=_('The maximum number of retries for checking for ' - 'pluto daemon shutdown')), - cfg.FloatOpt('shutdown_check_back_off', - default=1.5, - help=_('A factor to increase the retry interval for ' - 'each retry')) -] - -cfg.CONF.register_opts(libreswan_opts, 'libreswan') - class LibreSwanProcess(ipsec.OpenSwanProcess): """Libreswan Process manager class. @@ -51,7 +30,6 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): def __init__(self, conf, process_id, vpnservice, namespace): super(LibreSwanProcess, self).__init__(conf, process_id, vpnservice, namespace) - self.pid_file = '%s.pid' % self.pid_path def ensure_configs(self): """Generate config files which are needed for Libreswan. @@ -86,98 +64,6 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): except RuntimeError: self._execute([self.binary, 'initnss', self.etc_dir]) - def _process_running(self): - """Checks if process is still running.""" - - # If no PID file, we assume the process is not running. - if not os.path.exists(self.pid_file): - return False - - try: - # We take an ask-forgiveness-not-permission approach and rely - # on throwing to tell us something. If the pid file exists, - # delve into the process information and check if it matches - # our expected command line. - with open(self.pid_file, 'r') as f: - pid = f.readline().strip() - with open('/proc/%s/cmdline' % pid) as cmd_line_file: - cmd_line = cmd_line_file.readline() - if self.pid_path in cmd_line and 'pluto' in cmd_line: - # Okay the process is probably a libreswan process - # and it contains the pid_path in the command - # line... could be a race. Log to error and return - # that it is *NOT* okay to clean up files. We are - # logging to error instead of debug because it - # indicates something bad has happened and this is - # valuable information for figuring it out. - LOG.error(_LE('Process %(pid)s exists with command ' - 'line %(cmd_line)s.') % - {'pid': pid, 'cmd_line': cmd_line}) - return True - - except IOError as e: - # This is logged as "info" instead of error because it simply - # means that we couldn't find the files to check on them. - LOG.info(_LI('Unable to find control files on startup for ' - 'router %(router)s: %(msg)s'), - {'router': self.id, 'msg': e}) - return False - - def _cleanup_control_files(self): - try: - ctl_file = '%s.ctl' % self.pid_path - LOG.debug('Removing %(pidfile)s and %(ctlfile)s', - {'pidfile': self.pid_file, - 'ctlfile': '%s.ctl' % ctl_file}) - - if os.path.exists(self.pid_file): - os.remove(self.pid_file) - - if os.path.exists(ctl_file): - os.remove(ctl_file) - - except OSError as e: - LOG.error(_LE('Unable to remove libreswan control ' - 'files for router %(router)s. %(msg)s'), - {'router': self.id, 'msg': e}) - - def start(self): - # NOTE: The restart operation calls the parent's start() instead of - # this one to avoid having to special case the startup file check. - # If anything is added to this method that needs to run whenever - # a restart occurs, it should be either added to the restart() - # override or things refactored to special-case start() when - # called from restart(). - - # LibreSwan's use of the capabilities library may prevent the ctl - # and pid files from being cleaned up, so we check to see if the - # process is running and if not, attempt a cleanup. In either case - # we fall through to allow the LibreSwan process to start or fail - # in the usual way. - if not self._process_running(): - self._cleanup_control_files() - - super(LibreSwanProcess, self).start() - - def restart(self): - # stop() followed immediately by a start() runs the risk that the - # current pluto daemon has not had a chance to shutdown. We check - # the current process information to see if the daemon is still - # running and if so, wait a short interval and retry. - self.stop() - wait_interval = cfg.CONF.libreswan.shutdown_check_timeout - for i in range(cfg.CONF.libreswan.shutdown_check_retries): - if not self._process_running(): - self._cleanup_control_files() - break - eventlet.sleep(wait_interval) - wait_interval *= cfg.CONF.libreswan.shutdown_check_back_off - else: - LOG.warning(_LW('Server appears to still be running, restart ' - 'of router %s may fail'), self.id) - - super(LibreSwanProcess, self).start() - class LibreSwanDriver(ipsec.IPsecDriver): def create_process(self, process_id, vpnservice, namespace): diff --git a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py index 0a2cd20ff..c0a8eccf1 100644 --- a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py +++ b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py @@ -690,12 +690,29 @@ class IPSecDeviceDVR(BaseIPsecDeviceDriver): class TestOpenSwanProcess(BaseIPsecDeviceDriver): + + _test_timeout = 1 + _test_backoff = 2 + _test_retries = 5 + def setUp(self, driver=openswan_ipsec.OpenSwanDriver, ipsec_process=openswan_ipsec.OpenSwanProcess): super(TestOpenSwanProcess, self).setUp(driver, ipsec_process) + # Insulate tests against changes to configuration defaults. self.conf.register_opts(openswan_ipsec.openswan_opts, 'openswan') self.conf.set_override('state_path', '/tmp') + cfg.CONF.register_opts(openswan_ipsec.pluto_opts, + 'pluto') + cfg.CONF.set_override('shutdown_check_timeout', self._test_timeout, + group='pluto') + cfg.CONF.set_override('shutdown_check_back_off', self._test_backoff, + group='pluto') + cfg.CONF.set_override('shutdown_check_retries', self._test_retries, + group='pluto') + self.addCleanup(cfg.CONF.reset) + + self.os_remove = mock.patch('os.remove').start() self.process = openswan_ipsec.OpenSwanProcess(self.conf, 'foo-process-id', @@ -810,84 +827,55 @@ class TestOpenSwanProcess(BaseIPsecDeviceDriver): self.assertEqual(expected_connection_status_dict, self.process.connection_status) - -class TestLibreSwanProcess(base.BaseTestCase): - - _test_timeout = 1 - _test_backoff = 2 - _test_retries = 5 - - def setUp(self): - super(TestLibreSwanProcess, self).setUp() - # Insulate tests against changes to configuration defaults. - cfg.CONF.register_opts(libreswan_ipsec.libreswan_opts, - 'libreswan') - cfg.CONF.set_override('shutdown_check_timeout', self._test_timeout, - group='libreswan') - cfg.CONF.set_override('shutdown_check_back_off', self._test_backoff, - group='libreswan') - cfg.CONF.set_override('shutdown_check_retries', self._test_retries, - group='libreswan') - self.addCleanup(cfg.CONF.reset) - self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE) - self.parent_start = mock.patch('neutron_vpnaas.services.' - 'vpn.device_drivers.ipsec.' - 'OpenSwanProcess.start').start() - self.parent_stop = mock.patch('neutron_vpnaas.services.' - 'vpn.device_drivers.ipsec.' - 'OpenSwanProcess.stop').start() - self.os_remove = mock.patch('os.remove').start() - - self.ipsec_process = libreswan_ipsec.LibreSwanProcess(cfg.CONF, - 'foo-process-id', - self.vpnservice, - mock.ANY) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') - def test_no_cleanups(self, cleanup_mock): + 'ipsec.OpenSwanProcess._get_nexthop', + return_value='172.168.1.2') + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'ipsec.OpenSwanProcess._cleanup_control_files') + def test_no_cleanups(self, cleanup_mock, hop_mock): # Not an "awesome test" but more of a check box item. Basically, # what happens if we didn't need to clean up any files. - with mock.patch.object(self.ipsec_process, + with mock.patch.object(self.process, '_process_running', return_value=True) as query_mock: - self.ipsec_process.start() - self.assertEqual(1, self.parent_start.call_count) + self.process.start() self.assertEqual(1, query_mock.call_count) # This is really what is being tested here. If process is # running, we shouldn't attempt a cleanup. self.assertFalse(cleanup_mock.called) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'ipsec.OpenSwanProcess._get_nexthop', + return_value='172.168.1.2') @mock.patch('os.path.exists', return_value=True) - def test_cleanup_files(self, exists_mock): + def test_cleanup_files(self, exists_mock, hop_mock): # Tests the 'bones' of things really and kind of check-box-item-bogus # test - this really needs exercising through a higher level test. - with mock.patch.object(self.ipsec_process, + with mock.patch.object(self.process, '_process_running', return_value=False) as query_mock: fake_path = '/fake/path/run' - self.ipsec_process.pid_path = fake_path - self.ipsec_process.pid_file = '%s.pid' % fake_path - self.ipsec_process.start() - self.assertEqual(1, self.parent_start.call_count) + self.process.pid_path = fake_path + self.process.pid_file = '%s.pid' % fake_path + self.process.start() self.assertEqual(1, query_mock.call_count) self.assertEqual(2, self.os_remove.call_count) self.os_remove.assert_has_calls([mock.call('%s.pid' % fake_path), mock.call('%s.ctl' % fake_path)]) @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._process_running', - return_value=False) + 'ipsec.OpenSwanProcess._get_nexthop', + return_value='172.168.1.2') @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + 'ipsec.OpenSwanProcess._process_running', + return_value=False) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'ipsec.OpenSwanProcess._cleanup_control_files') @mock.patch('eventlet.sleep') def test_restart_process_not_running(self, sleep_mock, cleanup_mock, - query_mock): - self.ipsec_process.restart() - # Lame checks that are really for sanity - self.assertTrue(self.parent_stop.called) - self.assertTrue(self.parent_start.called) + query_mock, hop_mock): + self.process.restart() # Really what is being tested - retry configuration exists and that # we do the right things when process check is false. @@ -896,111 +884,118 @@ class TestLibreSwanProcess(base.BaseTestCase): self.assertFalse(sleep_mock.called) @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._process_running', - return_value=True) + 'ipsec.OpenSwanProcess._get_nexthop', + return_value='172.168.1.2') @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + 'ipsec.OpenSwanProcess._process_running', + return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'ipsec.OpenSwanProcess._cleanup_control_files') @mock.patch('eventlet.sleep') def test_restart_process_doesnt_stop(self, sleep_mock, cleanup_mock, - query_mock): - self.ipsec_process.restart() - # Lame checks that are really for sanity - self.assertTrue(self.parent_stop.called) - self.assertTrue(self.parent_start.called) + query_mock, hop_mock): + self.process.restart() # Really what is being tested - retry configuration exists and that # we do the right things when process check is True. - self.assertEqual(5, query_mock.call_count) + self.assertEqual(self._test_retries + 1, query_mock.call_count) self.assertFalse(cleanup_mock.called) - self.assertEqual(5, sleep_mock.call_count) + self.assertEqual(self._test_retries, sleep_mock.call_count) calls = [mock.call(1), mock.call(2), mock.call(4), mock.call(8), mock.call(16)] sleep_mock.assert_has_calls(calls) @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._process_running', - side_effect=[True, True, False]) + 'ipsec.OpenSwanProcess._get_nexthop', + return_value='172.168.1.2') @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + 'ipsec.OpenSwanProcess._process_running', + side_effect=[True, True, False, False]) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'ipsec.OpenSwanProcess._cleanup_control_files') @mock.patch('eventlet.sleep') def test_restart_process_retry_until_stop(self, sleep_mock, cleanup_mock, - query_mock): - self.ipsec_process.restart() - # Lame checks that are really for sanity - self.assertTrue(self.parent_start.called) - self.assertTrue(self.parent_stop.called) + query_mock, hop_mock): + self.process.restart() # Really what is being tested - retry configuration exists and that # we do the right things when process check is True a few times and # then returns False. - self.assertEqual(3, query_mock.call_count) + self.assertEqual(4, query_mock.call_count) self.assertTrue(cleanup_mock.called) self.assertEqual(2, sleep_mock.call_count) def test_process_running_no_pid(self): with mock.patch('os.path.exists', return_value=False): self.assertFalse( - self.ipsec_process._process_running()) + self.process._process_running()) # open() is used elsewhere, so we need to inject a mocked open into the # module to be tested. @mock.patch('os.path.exists', return_value=True) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.open', + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.ipsec.open', create=True, side_effect=IOError) def test_process_running_open_failure(self, mock_open, mock_exists): - self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(self.process._process_running()) self.assertTrue(mock_exists.called) self.assertTrue(mock_open.called) @mock.patch('os.path.exists', return_value=True) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.open', + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.ipsec.open', create=True, side_effect=[io.StringIO(u'invalid'), IOError]) def test_process_running_bogus_pid(self, mock_open, mock_exists): with mock.patch.object(libreswan_ipsec.LOG, 'error'): - self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(self.process._process_running()) self.assertTrue(mock_exists.called) self.assertEqual(2, mock_open.call_count) @mock.patch('os.path.exists', return_value=True) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.open', + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.ipsec.open', create=True, side_effect=[io.StringIO(u'134'), io.StringIO(u'')]) def test_process_running_no_cmdline(self, mock_open, mock_exists): - with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: - self.assertFalse(self.ipsec_process._process_running()) + with mock.patch.object(openswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.process._process_running()) self.assertFalse(log_mock.called) self.assertEqual(2, mock_open.call_count) @mock.patch('os.path.exists', return_value=True) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.open', + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.ipsec.open', create=True, side_effect=[io.StringIO(u'134'), io.StringIO(u'ps ax')]) def test_process_running_cmdline_mismatch(self, mock_open, mock_exists): - with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: - self.assertFalse(self.ipsec_process._process_running()) + with mock.patch.object(openswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.process._process_running()) self.assertFalse(log_mock.called) self.assertEqual(2, mock_open.call_count) @mock.patch('os.path.exists', return_value=True) - @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' - 'libreswan_ipsec.open', + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.ipsec.open', create=True, side_effect=[io.StringIO(u'134'), io.StringIO(u'/usr/libexec/ipsec/pluto -ctlbase' '/some/foo/path')]) def test_process_running_cmdline_match(self, mock_open, mock_exists): - self.ipsec_process.pid_path = '/some/foo/path' - with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: - self.assertTrue(self.ipsec_process._process_running()) + self.process.pid_path = '/some/foo/path' + with mock.patch.object(openswan_ipsec.LOG, 'error') as log_mock: + self.assertTrue(self.process._process_running()) self.assertTrue(log_mock.called) + +class TestLibreSwanProcess(base.BaseTestCase): + + def setUp(self): + super(TestLibreSwanProcess, self).setUp() + self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE) + + self.ipsec_process = libreswan_ipsec.LibreSwanProcess(cfg.CONF, + 'foo-process-id', + self.vpnservice, + mock.ANY) + @mock.patch('os.remove') @mock.patch('os.path.exists', return_value=True) def test_ensure_configs_on_restart(self, exists_mock, remove_mock):