summaryrefslogtreecommitdiff
path: root/neutron/cmd/netns_cleanup.py
blob: ff33ee9eceb39e41a23995ecee92f4f277679bc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# Copyright (c) 2012 OpenStack Foundation.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import itertools
import re
import signal
import time

from neutron_lib import constants
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import importutils

from neutron.agent.common import ovs_lib
from neutron.agent.l3 import dvr_fip_ns
from neutron.agent.l3 import dvr_snat_ns
from neutron.agent.l3 import namespaces
from neutron.agent.linux import dhcp
from neutron.agent.linux import external_process
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils
from neutron.common import config
from neutron.conf.agent import cmd
from neutron.conf.agent import common as agent_config
from neutron.conf.agent import dhcp as dhcp_config

LOG = logging.getLogger(__name__)
LB_NS_PREFIX = 'qlbaas-'
NS_PREFIXES = {
    'dhcp': [dhcp.NS_PREFIX],
    'l3': [namespaces.NS_PREFIX, dvr_snat_ns.SNAT_NS_PREFIX,
           dvr_fip_ns.FIP_NS_PREFIX],
    'lbaas': [LB_NS_PREFIX],
}
SIGTERM_WAITTIME = 10
NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')


class PidsInNamespaceException(Exception):
    pass


class FakeDhcpPlugin(object):
    """Fake RPC plugin to bypass any RPC calls."""
    def __getattribute__(self, name):
        def fake_method(*args):
            pass
        return fake_method


def setup_conf():
    """Setup the cfg for the clean up utility.

    Use separate setup_conf for the utility because there are many options
    from the main config that do not apply during clean-up.
    """

    conf = cfg.CONF
    cmd.register_cmd_opts(cmd.netns_opts, conf)
    agent_config.register_interface_driver_opts_helper(conf)
    dhcp_config.register_agent_dhcp_opts(conf)
    agent_config.register_interface_opts()
    return conf


def _get_dhcp_process_monitor(config):
    return external_process.ProcessMonitor(config=config,
                                           resource_type='dhcp')


def kill_dhcp(conf, namespace):
    """Disable DHCP for a network if DHCP is still active."""
    network_id = namespace.replace(dhcp.NS_PREFIX, '')

    dhcp_driver = importutils.import_object(
        conf.dhcp_driver,
        conf=conf,
        process_monitor=_get_dhcp_process_monitor(conf),
        network=dhcp.NetModel({'id': network_id}),
        plugin=FakeDhcpPlugin())

    if dhcp_driver.active:
        dhcp_driver.disable()


def eligible_for_deletion(conf, namespace, force=False):
    """Determine whether a namespace is eligible for deletion.

    Eligibility is determined by having only the lo device or if force
    is passed as a parameter.
    """

    if conf.agent_type:
        prefixes = NS_PREFIXES.get(conf.agent_type)
    else:
        prefixes = itertools.chain(*NS_PREFIXES.values())
    ns_mangling_pattern = '(%s%s)' % ('|'.join(prefixes),
                                      constants.UUID_PATTERN)

    # filter out namespaces without UUID as the name
    if not re.match(ns_mangling_pattern, namespace):
        return False

    ip = ip_lib.IPWrapper(namespace=namespace)
    return force or ip.namespace_is_empty()


def unplug_device(conf, device):
    orig_log_fail_as_error = device.get_log_fail_as_error()
    device.set_log_fail_as_error(False)
    try:
        device.link.delete()
    except RuntimeError:
        device.set_log_fail_as_error(orig_log_fail_as_error)
        # Maybe the device is OVS port, so try to delete
        ovs = ovs_lib.BaseOVS()
        bridge_name = ovs.get_bridge_for_iface(device.name)
        if bridge_name:
            bridge = ovs_lib.OVSBridge(bridge_name)
            bridge.delete_port(device.name)
        else:
            LOG.debug('Unable to find bridge for device: %s', device.name)
    finally:
        device.set_log_fail_as_error(orig_log_fail_as_error)


def find_listen_pids_namespace(namespace):
    """Retrieve a list of pids of listening processes within the given netns.

    It executes netstat -nlp and returns a set of unique pairs
    """
    ip = ip_lib.IPWrapper(namespace=namespace)
    pids = set()
    cmd = ['netstat', '-nlp']
    output = ip.netns.execute(cmd, run_as_root=True)
    for line in output.splitlines():
        m = NETSTAT_PIDS_REGEX.match(line)
        if m:
            pids.add(m.group('pid'))
    return pids


def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
    """Poll listening processes within the given namespace.

    If after timeout seconds, there are remaining processes in the namespace,
    then a PidsInNamespaceException will be thrown.
    """
    # NOTE(dalvarez): This function can block forever if
    # find_listen_pids_in_namespace never returns which is really unlikely. We
    # can't use wait_until_true because we might get interrupted by eventlet
    # Timeout during our I/O with rootwrap daemon and that will lead to errors
    # in subsequent calls to utils.execute grabbing always the output of the
    # previous command
    start = end = time.time()
    while end - start < timeout:
        if not find_listen_pids_namespace(namespace):
            return
        time.sleep(1)
        end = time.time()
    raise PidsInNamespaceException


def _kill_listen_processes(namespace, force=False):
    """Identify all listening processes within the given namespace.

    Then, for each one, find its top parent with same cmdline (in case this
    process forked) and issue a SIGTERM to all of them. If force is True,
    then a SIGKILL will be issued to all parents and all their children. Also,
    this function returns the number of listening processes.
    """
    pids = find_listen_pids_namespace(namespace)
    pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
    kill_signal = signal.SIGTERM
    if force:
        kill_signal = signal.SIGKILL
        children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
        pids_to_kill.update(itertools.chain.from_iterable(children))

    for pid in pids_to_kill:
        # Throw a warning since this particular cleanup may need a specific
        # implementation in the right module. Ideally, netns_cleanup wouldn't
        # kill any processes as the responsible module should've killed them
        # before cleaning up the namespace
        LOG.warning("Killing (%(signal)d) [%(pid)s] %(cmdline)s",
                    {'signal': kill_signal,
                     'pid': pid,
                     'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
                     })
        try:
            utils.kill_process(pid, kill_signal, run_as_root=True)
        except Exception as ex:
            LOG.error('An error occurred while killing '
                      '[%(pid)s]: %(msg)s', {'pid': pid, 'msg': ex})
    return len(pids)


def kill_listen_processes(namespace):
    """Kill all processes listening within the given namespace.

    First it tries to kill them using SIGTERM, waits until they die gracefully
    and then kills remaining processes (if any) with SIGKILL
    """
    if _kill_listen_processes(namespace, force=False):
        try:
            wait_until_no_listen_pids_namespace(namespace)
        except PidsInNamespaceException:
            _kill_listen_processes(namespace, force=True)
            # Allow some time for remaining processes to die
            wait_until_no_listen_pids_namespace(namespace)


def destroy_namespace(conf, namespace, force=False):
    """Destroy a given namespace.

    If force is True, then dhcp (if it exists) will be disabled and all
    devices will be forcibly removed.
    """

    try:
        ip = ip_lib.IPWrapper(namespace=namespace)

        if force:
            kill_dhcp(conf, namespace)
            # NOTE: The dhcp driver will remove the namespace if is it empty,
            # so a second check is required here.
            if ip.netns.exists(namespace):
                try:
                    kill_listen_processes(namespace)
                except PidsInNamespaceException:
                    # This is unlikely since, at this point, we have SIGKILLed
                    # all remaining processes but if there are still some, log
                    # the error and continue with the cleanup
                    LOG.error('Not all processes were killed in %s',
                              namespace)
                for device in ip.get_devices():
                    unplug_device(conf, device)

        ip.garbage_collect_namespace()
    except Exception:
        LOG.exception('Error unable to destroy namespace: %s', namespace)


def cleanup_network_namespaces(conf):
    # Identify namespaces that are candidates for deletion.
    candidates = [ns for ns in
                  ip_lib.list_network_namespaces()
                  if eligible_for_deletion(conf, ns, conf.force)]

    if candidates:
        time.sleep(2)

        for namespace in candidates:
            destroy_namespace(conf, namespace, conf.force)


def main():
    """Main method for cleaning up network namespaces.

    This method will make two passes checking for namespaces to delete. The
    process will identify candidates, sleep, and call garbage collect. The
    garbage collection will re-verify that the namespace meets the criteria for
    deletion (ie it is empty). The period of sleep and the 2nd pass allow
    time for the namespace state to settle, so that the check prior deletion
    will re-confirm the namespace is empty.

    The utility is designed to clean-up after the forced or unexpected
    termination of Neutron agents.

    The --force flag should only be used as part of the cleanup of a devstack
    installation as it will blindly purge namespaces and their devices. This
    option also kills any lingering DHCP instances.
    """
    conf = setup_conf()
    conf()
    config.setup_logging()
    agent_config.setup_privsep()
    cleanup_network_namespaces(conf)