# Copyright (c) 2012 OpenStack Foundation. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import itertools import re import signal import time from neutron_lib import constants from oslo_config import cfg from oslo_log import log as logging from oslo_utils import importutils from neutron.agent.common import ovs_lib from neutron.agent.l3 import dvr_fip_ns from neutron.agent.l3 import dvr_snat_ns from neutron.agent.l3 import namespaces from neutron.agent.linux import dhcp from neutron.agent.linux import external_process from neutron.agent.linux import ip_lib from neutron.agent.linux import utils from neutron.common import config from neutron.conf.agent import cmd from neutron.conf.agent import common as agent_config from neutron.conf.agent import dhcp as dhcp_config LOG = logging.getLogger(__name__) LB_NS_PREFIX = 'qlbaas-' NS_PREFIXES = { 'dhcp': [dhcp.NS_PREFIX], 'l3': [namespaces.NS_PREFIX, dvr_snat_ns.SNAT_NS_PREFIX, dvr_fip_ns.FIP_NS_PREFIX], 'lbaas': [LB_NS_PREFIX], } SIGTERM_WAITTIME = 10 NETSTAT_PIDS_REGEX = re.compile(r'.* (?P\d{2,6})/.*') class PidsInNamespaceException(Exception): pass class FakeDhcpPlugin(object): """Fake RPC plugin to bypass any RPC calls.""" def __getattribute__(self, name): def fake_method(*args): pass return fake_method def setup_conf(): """Setup the cfg for the clean up utility. Use separate setup_conf for the utility because there are many options from the main config that do not apply during clean-up. """ conf = cfg.CONF cmd.register_cmd_opts(cmd.netns_opts, conf) agent_config.register_interface_driver_opts_helper(conf) dhcp_config.register_agent_dhcp_opts(conf) agent_config.register_interface_opts() return conf def _get_dhcp_process_monitor(config): return external_process.ProcessMonitor(config=config, resource_type='dhcp') def kill_dhcp(conf, namespace): """Disable DHCP for a network if DHCP is still active.""" network_id = namespace.replace(dhcp.NS_PREFIX, '') dhcp_driver = importutils.import_object( conf.dhcp_driver, conf=conf, process_monitor=_get_dhcp_process_monitor(conf), network=dhcp.NetModel({'id': network_id}), plugin=FakeDhcpPlugin()) if dhcp_driver.active: dhcp_driver.disable() def eligible_for_deletion(conf, namespace, force=False): """Determine whether a namespace is eligible for deletion. Eligibility is determined by having only the lo device or if force is passed as a parameter. """ if conf.agent_type: prefixes = NS_PREFIXES.get(conf.agent_type) else: prefixes = itertools.chain(*NS_PREFIXES.values()) ns_mangling_pattern = '(%s%s)' % ('|'.join(prefixes), constants.UUID_PATTERN) # filter out namespaces without UUID as the name if not re.match(ns_mangling_pattern, namespace): return False ip = ip_lib.IPWrapper(namespace=namespace) return force or ip.namespace_is_empty() def unplug_device(conf, device): orig_log_fail_as_error = device.get_log_fail_as_error() device.set_log_fail_as_error(False) try: device.link.delete() except RuntimeError: device.set_log_fail_as_error(orig_log_fail_as_error) # Maybe the device is OVS port, so try to delete ovs = ovs_lib.BaseOVS() bridge_name = ovs.get_bridge_for_iface(device.name) if bridge_name: bridge = ovs_lib.OVSBridge(bridge_name) bridge.delete_port(device.name) else: LOG.debug('Unable to find bridge for device: %s', device.name) finally: device.set_log_fail_as_error(orig_log_fail_as_error) def find_listen_pids_namespace(namespace): """Retrieve a list of pids of listening processes within the given netns. It executes netstat -nlp and returns a set of unique pairs """ ip = ip_lib.IPWrapper(namespace=namespace) pids = set() cmd = ['netstat', '-nlp'] output = ip.netns.execute(cmd, run_as_root=True) for line in output.splitlines(): m = NETSTAT_PIDS_REGEX.match(line) if m: pids.add(m.group('pid')) return pids def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME): """Poll listening processes within the given namespace. If after timeout seconds, there are remaining processes in the namespace, then a PidsInNamespaceException will be thrown. """ # NOTE(dalvarez): This function can block forever if # find_listen_pids_in_namespace never returns which is really unlikely. We # can't use wait_until_true because we might get interrupted by eventlet # Timeout during our I/O with rootwrap daemon and that will lead to errors # in subsequent calls to utils.execute grabbing always the output of the # previous command start = end = time.time() while end - start < timeout: if not find_listen_pids_namespace(namespace): return time.sleep(1) end = time.time() raise PidsInNamespaceException def _kill_listen_processes(namespace, force=False): """Identify all listening processes within the given namespace. Then, for each one, find its top parent with same cmdline (in case this process forked) and issue a SIGTERM to all of them. If force is True, then a SIGKILL will be issued to all parents and all their children. Also, this function returns the number of listening processes. """ pids = find_listen_pids_namespace(namespace) pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids} kill_signal = signal.SIGTERM if force: kill_signal = signal.SIGKILL children = [utils.find_child_pids(pid, True) for pid in pids_to_kill] pids_to_kill.update(itertools.chain.from_iterable(children)) for pid in pids_to_kill: # Throw a warning since this particular cleanup may need a specific # implementation in the right module. Ideally, netns_cleanup wouldn't # kill any processes as the responsible module should've killed them # before cleaning up the namespace LOG.warning("Killing (%(signal)d) [%(pid)s] %(cmdline)s", {'signal': kill_signal, 'pid': pid, 'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80] }) try: utils.kill_process(pid, kill_signal, run_as_root=True) except Exception as ex: LOG.error('An error occurred while killing ' '[%(pid)s]: %(msg)s', {'pid': pid, 'msg': ex}) return len(pids) def kill_listen_processes(namespace): """Kill all processes listening within the given namespace. First it tries to kill them using SIGTERM, waits until they die gracefully and then kills remaining processes (if any) with SIGKILL """ if _kill_listen_processes(namespace, force=False): try: wait_until_no_listen_pids_namespace(namespace) except PidsInNamespaceException: _kill_listen_processes(namespace, force=True) # Allow some time for remaining processes to die wait_until_no_listen_pids_namespace(namespace) def destroy_namespace(conf, namespace, force=False): """Destroy a given namespace. If force is True, then dhcp (if it exists) will be disabled and all devices will be forcibly removed. """ try: ip = ip_lib.IPWrapper(namespace=namespace) if force: kill_dhcp(conf, namespace) # NOTE: The dhcp driver will remove the namespace if is it empty, # so a second check is required here. if ip.netns.exists(namespace): try: kill_listen_processes(namespace) except PidsInNamespaceException: # This is unlikely since, at this point, we have SIGKILLed # all remaining processes but if there are still some, log # the error and continue with the cleanup LOG.error('Not all processes were killed in %s', namespace) for device in ip.get_devices(): unplug_device(conf, device) ip.garbage_collect_namespace() except Exception: LOG.exception('Error unable to destroy namespace: %s', namespace) def cleanup_network_namespaces(conf): # Identify namespaces that are candidates for deletion. candidates = [ns for ns in ip_lib.list_network_namespaces() if eligible_for_deletion(conf, ns, conf.force)] if candidates: time.sleep(2) for namespace in candidates: destroy_namespace(conf, namespace, conf.force) def main(): """Main method for cleaning up network namespaces. This method will make two passes checking for namespaces to delete. The process will identify candidates, sleep, and call garbage collect. The garbage collection will re-verify that the namespace meets the criteria for deletion (ie it is empty). The period of sleep and the 2nd pass allow time for the namespace state to settle, so that the check prior deletion will re-confirm the namespace is empty. The utility is designed to clean-up after the forced or unexpected termination of Neutron agents. The --force flag should only be used as part of the cleanup of a devstack installation as it will blindly purge namespaces and their devices. This option also kills any lingering DHCP instances. """ conf = setup_conf() conf() config.setup_logging() cleanup_network_namespaces(conf)