Sync q-agent-cleanup and ocf script with fuel-library

- Neutron VPN agent resource uses q-agent-cleanup.py script as well as L3 agent
resource with minor changes, so it should be synced with the original script
- Backport fix for LP #1442251 to OCF script
- Backport fix for LP #1405477 to OCF script

Change-Id: I0ef7207abf2aa035a4fa2cfcae62a1ad46eef415
This commit is contained in:
Sergey Kolekonov 2015-06-09 12:46:49 +03:00
parent a7d419dfe9
commit 7bc95b9880
2 changed files with 337 additions and 306 deletions

View File

@ -400,9 +400,9 @@ get_ns_list() {
} }
get_pid_list_for_ns_list() { get_pid_list_for_ns_list() {
# the first parameter is a list of ns names for searching pids # Parameters contain namespace names for searching pids
local ns_list="$1" local ns_list="$@"
local pids=`for netns in $ns_list ; do ip netns exec $netns lsof -n -i -t ; done` local pids=`for netns in $ns_list ; do ip netns pids $netns ; done`
echo $pids echo $pids
} }

View File

@ -1,42 +1,102 @@
#!/usr/bin/env python #!/usr/bin/env python
import re # Copyright 2013 - 2015 Mirantis, Inc.
import time #
import os # Licensed under the Apache License, Version 2.0 (the "License"); you may
import sys # not use this file except in compliance with the License. You may obtain
import random # a copy of the License at
import string #
import json # http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import argparse import argparse
from ConfigParser import SafeConfigParser
import functools
import json
import logging import logging
import logging.config
import logging.handlers import logging.handlers
import shlex import re
import subprocess
import StringIO
import socket import socket
from neutronclient.neutron import client as q_client import StringIO
from keystoneclient.v2_0 import client as ks_client import subprocess
from keystoneclient.apiclient.exceptions import NotFound as ks_NotFound import sys
from time import sleep
from neutronclient.neutron import client as n_client
LOG_NAME = 'q-agent-cleanup' LOG_NAME = 'q-agent-cleanup'
API_VER = '2.0' API_VER = '2.0'
PORT_ID_PART_LEN = 11 PORT_ID_PART_LEN = 11
TMP_USER_NAME = 'tmp_neutron_admin'
def get_authconfig(cfg_file): def make_logger(handler=logging.StreamHandler(sys.stdout), level=logging.INFO):
# Read OS auth config file format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
rv = {} handler.setFormatter(format)
stripchars=" \'\"" logger = logging.getLogger(LOG_NAME)
logger.addHandler(handler)
logger.setLevel(level)
return logger
LOG = make_logger()
AUTH_KEYS = {
'tenant_name': 'admin_tenant_name',
'username': 'admin_user',
'password': 'admin_password',
'auth_url': 'auth_uri',
}
def get_auth_data(cfg_file, section='keystone_authtoken', keys=AUTH_KEYS):
cfg = SafeConfigParser()
with open(cfg_file) as f: with open(cfg_file) as f:
for line in f: cfg.readfp(f)
rg = re.match(r'\s*export\s+(\w+)\s*=\s*(.*)',line) auth_data = {}
if rg : for key, value in keys.iteritems():
#Use shlex to unescape bash shell escape characters auth_data[key] = cfg.get(section, value)
value = "".join(x for x in return auth_data
shlex.split(rg.group(2).strip(stripchars)))
rv[rg.group(1).strip(stripchars)] = value # Note(xarses): be careful not to inject \n's into the regex pattern
return rv # or it will case the maching to fail
RECOVERABLE = re.compile((
'(HTTP\s+400\))|'
'(400-\{\'message\'\:\s+\'\'\})|'
'(\[Errno 111\]\s+Connection\s+refused)|'
'(503\s+Service\s+Unavailable)|'
'(504\s+Gateway\s+Time-out)|'
'(\:\s+Maximum\s+attempts\s+reached)|'
'(Unauthorized\:\s+bad\s+credentials)|'
'(Max\s+retries\s+exceeded)|'
"""('*NoneType'*\s+object\s+ha'\s+no\s+attribute\s+'*__getitem__'*$)|"""
'(No\s+route\s+to\s+host$)|'
'(Lost\s+connection\s+to\s+MySQL\s+server)'), flags=re.M)
RETRY_COUNT = 50
RETRY_DELAY = 2
def retry(func, pattern=RECOVERABLE):
@functools.wraps(func)
def wrapper(*args, **kwargs):
i = 0
while True:
try:
return func(*args, **kwargs)
except Exception as e:
if pattern and not pattern.match(e.message):
raise e
i += 1
if i >= RETRY_COUNT:
raise e
LOG.debug("retry request {0}: {1}".format(i, e))
sleep(RETRY_DELAY)
return wrapper
class NeutronCleaner(object): class NeutronCleaner(object):
@ -53,7 +113,7 @@ class NeutronCleaner(object):
PORT_NAME_PREFIXES_BY_DEV_OWNER['network:router_interface'] PORT_NAME_PREFIXES_BY_DEV_OWNER['network:router_interface']
) )
} }
BRIDGES_FOR_PORTS_BY_AGENT ={ BRIDGES_FOR_PORTS_BY_AGENT = {
'dhcp': ('br-int',), 'dhcp': ('br-int',),
'l3': ('br-int', 'br-ex'), 'l3': ('br-int', 'br-ex'),
} }
@ -77,11 +137,12 @@ class NeutronCleaner(object):
CMD__ip_netns_list = ['ip', 'netns', 'list'] CMD__ip_netns_list = ['ip', 'netns', 'list']
CMD__ip_netns_exec = ['ip', 'netns', 'exec'] CMD__ip_netns_exec = ['ip', 'netns', 'exec']
RE__port_in_portlist = re.compile(r"^\s*\d+\:\s+([\w-]+)\:") # 14: tap-xxxyyyzzz: # 14: tap-xxxyyyzzz:
RE__port_in_portlist = re.compile(r"^\s*\d+\:\s+([\w-]+)\:")
def __init__(self, openrc, options, log=None): def __init__(self, options, log=None):
self.log = log self.log = log
self.auth_config = openrc self.auth_data = get_auth_data(cfg_file=options.get('authconf'))
self.options = options self.options = options
self.agents = {} self.agents = {}
self.debug = options.get('debug') self.debug = options.get('debug')
@ -90,180 +151,93 @@ class NeutronCleaner(object):
'l3': self._reschedule_agent_l3, 'l3': self._reschedule_agent_l3,
} }
self._token = None
self._keystone = None
self._client = None self._client = None
self._need_cleanup_tmp_admin = False
def __del__(self):
if self._need_cleanup_tmp_admin and self._keystone and self._keystone.username:
try:
self._keystone.users.delete(self._keystone.users.find(username=self._keystone.username))
except:
# if we get exception while cleaning temporary account -- nothing harm
pass
def generate_random_passwd(self, length=13):
chars = string.ascii_letters + string.digits + '!@#$%^&*()'
random.seed = (os.urandom(1024))
return ''.join(random.choice(chars) for i in range(length))
@property
def keystone(self):
if self._keystone is None:
ret_count = self.options.get('retries', 1)
tmp_passwd = self.generate_random_passwd()
while True:
if ret_count <= 0:
self.log.error(">>> Keystone error: no more retries for connect to keystone server.")
sys.exit(1)
try:
a_token = self.options.get('auth-token')
a_url = self.options.get('admin-auth-url')
if a_token and a_url:
self.log.debug("Authentication by predefined token.")
# create keystone instance, authorized by service token
ks = ks_client.Client(
token=a_token,
endpoint=a_url,
)
service_tenant = ks.tenants.find(name='services')
auth_url = ks.endpoints.find(
service_id=ks.services.find(type='identity').id
).internalurl
# find and re-create temporary rescheduling-admin user with random password
try:
user = ks.users.find(username=TMP_USER_NAME)
ks.users.delete(user)
except ks_NotFound:
# user not found, it's OK
pass
user = ks.users.create(TMP_USER_NAME, tmp_passwd, tenant_id=service_tenant.id)
ks.roles.add_user_role(user, ks.roles.find(name='admin'), service_tenant)
# authenticate newly-created tmp neutron admin
self._keystone = ks_client.Client(
username=user.username,
password=tmp_passwd,
tenant_id=user.tenantId,
auth_url=auth_url,
)
self._need_cleanup_tmp_admin = True
else:
self.log.debug("Authentication by given credentionals.")
self._keystone = ks_client.Client(
username=self.auth_config['OS_USERNAME'],
password=self.auth_config['OS_PASSWORD'],
tenant_name=self.auth_config['OS_TENANT_NAME'],
auth_url=self.auth_config['OS_AUTH_URL'],
)
break
except Exception as e:
errmsg = str(e.message).strip() # str() need, because keystone may use int as message in exception
if re.search(r"Connection\s+refused$", errmsg, re.I) or \
re.search(r"Connection\s+timed\s+out$", errmsg, re.I) or\
re.search(r"Lost\s+connection\s+to\s+MySQL\s+server", errmsg, re.I) or\
re.search(r"Service\s+Unavailable$", errmsg, re.I) or\
re.search(r"'*NoneType'*\s+object\s+has\s+no\s+attribute\s+'*__getitem__'*$", errmsg, re.I) or \
re.search(r"No\s+route\s+to\s+host$", errmsg, re.I):
self.log.info(">>> Can't connect to {0}, wait for server ready...".format(self.auth_config['OS_AUTH_URL']))
time.sleep(self.options.sleep)
else:
self.log.error(">>> Keystone error:\n{0}".format(e.message))
raise e
ret_count -= 1
return self._keystone
@property
def token(self):
if self._token is None:
self._token = self._keystone.auth_token
#self.log.debug("Auth_token: '{0}'".format(self._token))
#todo: Validate existing token
return self._token
@property @property
@retry
def client(self): def client(self):
if self._client is None: if self._client is None:
self._client = q_client.Client( self._client = n_client.Client(API_VER, **self.auth_data)
API_VER,
endpoint_url=self.keystone.endpoints.find(
service_id=self.keystone.services.find(type='network').id
).adminurl,
token=self.token,
)
return self._client return self._client
def _neutron_API_call(self, method, *args): @retry
ret_count = self.options.get('retries') def _get_agents(self, use_cache=True):
while True: return self.client.list_agents()['agents']
if ret_count <= 0:
self.log.error("Q-server error: no more retries for connect to server.")
return []
try:
rv = method (*args)
break
except Exception as e:
errmsg = str(e.message).strip()
if re.search(r"Connection\s+refused", errmsg, re.I) or\
re.search(r"Connection\s+timed\s+out", errmsg, re.I) or\
re.search(r"Lost\s+connection\s+to\s+MySQL\s+server", errmsg, re.I) or\
re.search(r"503\s+Service\s+Unavailable", errmsg, re.I) or\
re.search(r"No\s+route\s+to\s+host", errmsg, re.I):
self.log.info("Can't connect to {0}, wait for server ready...".format(self.keystone.service_catalog.url_for(service_type='network')))
time.sleep(self.options.sleep)
else:
self.log.error("Neutron error:\n{0}".format(e.message))
raise e
ret_count -= 1
return rv
def _get_agents(self,use_cache=True):
return self._neutron_API_call(self.client.list_agents)['agents']
@retry
def _get_routers(self, use_cache=True): def _get_routers(self, use_cache=True):
return self._neutron_API_call(self.client.list_routers)['routers'] return self.client.list_routers()['routers']
@retry
def _get_networks(self, use_cache=True): def _get_networks(self, use_cache=True):
return self._neutron_API_call(self.client.list_networks)['networks'] return self.client.list_networks()['networks']
@retry
def _list_networks_on_dhcp_agent(self, agent_id): def _list_networks_on_dhcp_agent(self, agent_id):
return self._neutron_API_call(self.client.list_networks_on_dhcp_agent, agent_id)['networks'] return self.client.list_networks_on_dhcp_agent(
agent_id)['networks']
@retry
def _list_routers_on_l3_agent(self, agent_id): def _list_routers_on_l3_agent(self, agent_id):
return self._neutron_API_call(self.client.list_routers_on_l3_agent, agent_id)['routers'] return self.client.list_routers_on_l3_agent(
agent_id)['routers']
@retry
def _list_l3_agents_on_router(self, router_id): def _list_l3_agents_on_router(self, router_id):
return self._neutron_API_call(self.client.list_l3_agent_hosting_routers, router_id)['agents'] return self.client.list_l3_agent_hosting_routers(
router_id)['agents']
@retry
def _list_dhcp_agents_on_network(self, network_id): def _list_dhcp_agents_on_network(self, network_id):
return self._neutron_API_call(self.client.list_dhcp_agent_hosting_networks, network_id)['agents'] return self.client.list_dhcp_agent_hosting_networks(
network_id)['agents']
def _list_orphaned_networks(self): def _list_orphaned_networks(self):
networks = self._get_networks() networks = self._get_networks()
self.log.debug("_list_orphaned_networks:, got list of networks {0}".format(json.dumps(networks,indent=4))) self.log.debug(
"_list_orphaned_networks:, got list of networks {0}".format(
json.dumps(networks, indent=4)))
orphaned_networks = [] orphaned_networks = []
for network in networks: for network in networks:
if len(self._list_dhcp_agents_on_network(network['id'])) == 0: if len(self._list_dhcp_agents_on_network(network['id'])) == 0:
orphaned_networks.append(network['id']) orphaned_networks.append(network['id'])
self.log.debug("_list_orphaned_networks:, got list of orphaned networks {0}".format(orphaned_networks)) self.log.debug(
"_list_orphaned_networks:, got list of orphaned networks {0}".
format(orphaned_networks))
return orphaned_networks return orphaned_networks
def _list_orphaned_routers(self): def _list_orphaned_routers(self):
routers = self._get_routers() routers = self._get_routers()
self.log.debug("_list_orphaned_routers:, got list of routers {0}".format(json.dumps(routers,indent=4))) self.log.debug(
"_list_orphaned_routers:, got list of routers {0}".format(
json.dumps(routers, indent=4)))
orphaned_routers = [] orphaned_routers = []
for router in routers: for router in routers:
if len(self._list_l3_agents_on_router(router['id'])) == 0: if len(self._list_l3_agents_on_router(router['id'])) == 0:
orphaned_routers.append(router['id']) orphaned_routers.append(router['id'])
self.log.debug("_list_orphaned_routers:, got list of orphaned routers {0}".format(orphaned_routers)) self.log.debug(
"_list_orphaned_routers:, got list of orphaned routers {0}".format(
orphaned_routers))
return orphaned_routers return orphaned_routers
@retry
def _add_network_to_dhcp_agent(self, agent_id, net_id): def _add_network_to_dhcp_agent(self, agent_id, net_id):
return self._neutron_API_call(self.client.add_network_to_dhcp_agent, agent_id, {"network_id": net_id}) return self.client.add_network_to_dhcp_agent(
def _add_router_to_l3_agent(self, agent_id, router_id): agent_id, {"network_id": net_id})
return self._neutron_API_call(self.client.add_router_to_l3_agent, agent_id, {"router_id": router_id})
@retry
def _add_router_to_l3_agent(self, agent_id, router_id):
return self.client.add_router_to_l3_agent(
agent_id, {"router_id": router_id})
@retry
def _remove_router_from_l3_agent(self, agent_id, router_id): def _remove_router_from_l3_agent(self, agent_id, router_id):
return self._neutron_API_call(self.client.remove_router_from_l3_agent, agent_id, router_id) return self.client.remove_router_from_l3_agent(
agent_id, router_id)
@retry
def _delete_agent(self, agent_id):
return self.client.delete_agent(agent_id)
def _get_agents_by_type(self, agent, use_cache=True): def _get_agents_by_type(self, agent, use_cache=True):
self.log.debug("_get_agents_by_type: start.") self.log.debug("_get_agents_by_type: start.")
@ -276,29 +250,39 @@ class NeutronCleaner(object):
from_cache = '' from_cache = ''
else: else:
from_cache = ' from local cache' from_cache = ' from local cache'
self.log.debug("_get_agents_by_type: end, {0} rv: {1}".format(from_cache, json.dumps(rv, indent=4))) self.log.debug(
"_get_agents_by_type: end, {0} rv: {1}".format(
from_cache, json.dumps(rv, indent=4)))
return rv return rv
def __collect_namespaces_for_agent(self, agent): def _execute(self, cmd):
cmd = self.CMD__ip_netns_list[:]
self.log.debug("Execute command '{0}'".format(' '.join(cmd)))
process = subprocess.Popen( process = subprocess.Popen(
cmd, cmd,
shell=False, shell=False,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE stderr=subprocess.PIPE
) )
rc = process.wait() (stdout, stderr) = process.communicate()
if rc != 0: ret_code = process.returncode
self.log.error("ERROR (rc={0}) while execution {1}".format(rc, ' '.join(cmd))) if ret_code != 0:
self.log.error(
"ERROR (rc={0}) while execution {1}, stderr: {2}".format(
ret_code, ' '.join(cmd), stderr))
return None
return ret_code, stdout
def __collect_namespaces_for_agent(self, agent):
cmd = self.CMD__ip_netns_list[:]
self.log.debug("Execute command '{0}'".format(' '.join(cmd)))
ret_code, stdout = self._execute(cmd)
if ret_code != 0:
return [] return []
# filter namespaces by given agent type # filter namespaces by given agent type
netns = [] netns = []
stdout = process.communicate()[0]
for ns in StringIO.StringIO(stdout): for ns in StringIO.StringIO(stdout):
ns = ns.strip() ns = ns.strip()
self.log.debug("Found network namespace '{0}'".format(ns)) self.log.debug("Found network namespace '{0}'".format(ns))
if ns.startswith("{0}-".format(self.NS_NAME_PREFIXES[agent])): if ns.startswith(self.NS_NAME_PREFIXES[agent]):
netns.append(ns) netns.append(ns)
return netns return netns
@ -306,18 +290,10 @@ class NeutronCleaner(object):
cmd = self.CMD__ip_netns_exec[:] cmd = self.CMD__ip_netns_exec[:]
cmd.extend([ns, 'ip', 'l', 'show']) cmd.extend([ns, 'ip', 'l', 'show'])
self.log.debug("Execute command '{0}'".format(' '.join(cmd))) self.log.debug("Execute command '{0}'".format(' '.join(cmd)))
process = subprocess.Popen( ret_code, stdout = self._execute(cmd)
cmd, if ret_code != 0:
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
rc = process.wait()
if rc != 0:
self.log.error("ERROR (rc={0}) while execution {1}".format(rc, ' '.join(cmd)))
return [] return []
ports = [] ports = []
stdout = process.communicate()[0]
for line in StringIO.StringIO(stdout): for line in StringIO.StringIO(stdout):
pp = self.RE__port_in_portlist.match(line) pp = self.RE__port_in_portlist.match(line)
if not pp: if not pp:
@ -347,15 +323,7 @@ class NeutronCleaner(object):
self.log.info("NOOP-execution: '{0}'".format(' '.join(cmd))) self.log.info("NOOP-execution: '{0}'".format(' '.join(cmd)))
else: else:
self.log.debug("Execute command '{0}'".format(' '.join(cmd))) self.log.debug("Execute command '{0}'".format(' '.join(cmd)))
process = subprocess.Popen( self._execute(cmd)
cmd,
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
rc = process.wait()
if rc != 0:
self.log.error("ERROR (rc={0}) while execution {1}".format(rc, ' '.join(cmd)))
self.log.debug("_cleanup_ports: end.") self.log.debug("_cleanup_ports: end.")
return True return True
@ -370,11 +338,13 @@ class NeutronCleaner(object):
dead_networks = [] dead_networks = []
for agent in self._get_agents_by_type(agent_type): for agent in self._get_agents_by_type(agent_type):
if agent['alive']: if agent['alive']:
self.log.info("found alive DHCP agent: {0}".format(agent['id'])) self.log.info(
"found alive DHCP agent: {0}".format(agent['id']))
agents['alive'].append(agent) agents['alive'].append(agent)
else: else:
# dead agent # dead agent
self.log.info("found dead DHCP agent: {0}".format(agent['id'])) self.log.info(
"found dead DHCP agent: {0}".format(agent['id']))
agents['dead'].append(agent) agents['dead'].append(agent)
for net in self._list_networks_on_dhcp_agent(agent['id']): for net in self._list_networks_on_dhcp_agent(agent['id']):
dead_networks.append(net) dead_networks.append(net)
@ -390,28 +360,33 @@ class NeutronCleaner(object):
for net in dead_networks: for net in dead_networks:
if net['id'] not in lucky_ids: if net['id'] not in lucky_ids:
# attach network to agent # attach network to agent
self.log.info("attach network {net} to DHCP agent {agent}".format( self.log.info(
net=net['id'], "attach network {net} to DHCP agent {agent}".format(
agent=agents['alive'][0]['id'] net=net['id'],
)) agent=agents['alive'][0]['id']))
if not self.options.get('noop'): if not self.options.get('noop'):
self._add_network_to_dhcp_agent(agents['alive'][0]['id'], net['id']) self._add_network_to_dhcp_agent(
#if error: agents['alive'][0]['id'], net['id'])
# return
# remove dead agents if need (and if found alive agent) # remove dead agents if need (and if found alive agent)
if self.options.get('remove-dead'): if self.options.get('remove-dead'):
for agent in agents['dead']: for agent in agents['dead']:
self.log.info("remove dead DHCP agent: {0}".format(agent['id'])) self.log.info(
"remove dead DHCP agent: {0}".format(agent['id']))
if not self.options.get('noop'): if not self.options.get('noop'):
self._neutron_API_call(self.client.delete_agent, agent['id']) self._delete_agent(agent['id'])
orphaned_networks=self._list_orphaned_networks() orphaned_networks = self._list_orphaned_networks()
self.log.info("_reschedule_agent_dhcp: rescheduling orphaned networks") self.log.info("_reschedule_agent_dhcp: rescheduling orphaned networks")
if orphaned_networks and agents['alive']: if orphaned_networks and agents['alive']:
for network in orphaned_networks: for network in orphaned_networks:
self.log.info("_reschedule_agent_dhcp: rescheduling {0} to {1}".format(network,agents['alive'][0]['id'])) self.log.info(
"_reschedule_agent_dhcp: rescheduling {0} to {1}".format(
network, agents['alive'][0]['id']))
if not self.options.get('noop'): if not self.options.get('noop'):
self._add_network_to_dhcp_agent(agents['alive'][0]['id'], network) self._add_network_to_dhcp_agent(
self.log.info("_reschedule_agent_dhcp: ended rescheduling of orphaned networks") agents['alive'][0]['id'], network)
self.log.info(
"_reschedule_agent_dhcp: ended rescheduling of orphaned networks")
self.log.debug("_reschedule_agent_dhcp: end.") self.log.debug("_reschedule_agent_dhcp: end.")
def _reschedule_agent_l3(self, agent_type): def _reschedule_agent_l3(self, agent_type):
@ -434,9 +409,11 @@ class NeutronCleaner(object):
lambda rou: dead_routers.append((rou, agent['id'])), lambda rou: dead_routers.append((rou, agent['id'])),
self._list_routers_on_l3_agent(agent['id']) self._list_routers_on_l3_agent(agent['id'])
) )
self.log.debug("L3 agents in cluster: {ags}".format(ags=json.dumps(agents, indent=4))) self.log.debug(
self.log.debug("Routers, attached to dead L3 agents: {rr}".format(rr=json.dumps(dead_routers, indent=4))) "L3 agents in cluster: {0}".format(
json.dumps(agents, indent=4)))
self.log.debug("Routers, attached to dead L3 agents: {0}".format(
json.dumps(dead_routers, indent=4)))
if dead_routers and agents['alive']: if dead_routers and agents['alive']:
# get router-ID list of already attached to alive agent routerss # get router-ID list of already attached to alive agent routerss
@ -449,49 +426,48 @@ class NeutronCleaner(object):
for agent in agents['dead']: for agent in agents['dead']:
self.log.info("remove dead L3 agent: {0}".format(agent['id'])) self.log.info("remove dead L3 agent: {0}".format(agent['id']))
if not self.options.get('noop'): if not self.options.get('noop'):
self._neutron_API_call(self.client.delete_agent, agent['id']) self._delete_agent(agent['id'])
# move routers from dead to alive agent # move routers from dead to alive agent
for rou in filter(lambda rr: not(rr[0]['id'] in lucky_ids), dead_routers): for rou in filter(
# self.log.info("unschedule router {rou} from L3 agent {agent}".format( lambda rr: not(rr[0]['id'] in lucky_ids), dead_routers):
# rou=rou[0]['id'], self.log.info(
# agent=rou[1] "schedule router {0} to L3 agent {1}".format(
# )) rou[0]['id'],
# if not self.options.get('noop'): agents['alive'][0]['id']))
# self._remove_router_from_l3_agent(rou[1], rou[0]['id'])
# #todo: if error:
# #
self.log.info("schedule router {rou} to L3 agent {agent}".format(
rou=rou[0]['id'],
agent=agents['alive'][0]['id']
))
if not self.options.get('noop'): if not self.options.get('noop'):
self._add_router_to_l3_agent(agents['alive'][0]['id'], rou[0]['id']) self._add_router_to_l3_agent(
agents['alive'][0]['id'], rou[0]['id'])
orphaned_routers=self._list_orphaned_routers() orphaned_routers = self._list_orphaned_routers()
self.log.info("_reschedule_agent_l3: rescheduling orphaned routers") self.log.info("_reschedule_agent_l3: rescheduling orphaned routers")
if orphaned_routers and agents['alive']: if orphaned_routers and agents['alive']:
for router in orphaned_routers: for router in orphaned_routers:
self.log.info("_reschedule_agent_l3: rescheduling {0} to {1}".format(router,agents['alive'][0]['id'])) self.log.info(
"_reschedule_agent_l3: rescheduling {0} to {1}".format(
router, agents['alive'][0]['id']))
if not self.options.get('noop'): if not self.options.get('noop'):
self._add_router_to_l3_agent(agents['alive'][0]['id'], router) self._add_router_to_l3_agent(
self.log.info("_reschedule_agent_l3: ended rescheduling of orphaned routers") agents['alive'][0]['id'], router)
self.log.info(
"_reschedule_agent_l3: ended rescheduling of orphaned routers")
self.log.debug("_reschedule_agent_l3: end.") self.log.debug("_reschedule_agent_l3: end.")
def _remove_self(self,agent_type): def _remove_self(self, agent_type):
self.log.debug("_remove_self: start.") self.log.debug("_remove_self: start.")
for agent in self._get_agents_by_type(agent_type): for agent in self._get_agents_by_type(agent_type):
if agent['host'] == socket.gethostname(): if agent['host'] == socket.gethostname():
self.log.info("_remove_self: deleting our own agent {0} of type {1}".format(agent['id'],agent_type)) self.log.info(
if not self.options.get('noop'): "_remove_self: deleting our own agent {0} of type {1}".
self._neutron_API_call(self.client.delete_agent, agent['id']) format(agent['id'], agent_type))
if not self.options.get('noop'):
self._delete_agent(agent['id'])
self.log.debug("_remove_self: end.") self.log.debug("_remove_self: end.")
def _reschedule_agent(self, agent): def _reschedule_agent(self, agent):
self.log.debug("_reschedule_agents: start.") self.log.debug("_reschedule_agents: start.")
task = self.RESCHEDULING_CALLS.get(agent, None) task = self.RESCHEDULING_CALLS.get(agent, None)
if task: if task:
task (agent) task(agent)
self.log.debug("_reschedule_agents: end.") self.log.debug("_reschedule_agents: end.")
def do(self, agent): def do(self, agent):
@ -501,8 +477,6 @@ class NeutronCleaner(object):
self._reschedule_agent(agent) self._reschedule_agent(agent)
if self.options.get('remove-self'): if self.options.get('remove-self'):
self._remove_self(agent) self._remove_self(agent)
# if self.options.get('remove-agent'):
# self._cleanup_agents(agent)
def _test_healthy(self, agent_list, hostname): def _test_healthy(self, agent_list, hostname):
rv = False rv = False
@ -512,7 +486,10 @@ class NeutronCleaner(object):
return rv return rv
def test_healthy(self, agent_type): def test_healthy(self, agent_type):
rc = 9 # OCF_FAILED_MASTER, http://www.linux-ha.org/doc/dev-guides/_literal_ocf_failed_master_literal_9.html # OCF_FAILED_MASTER,
# http://www.linux-ha.org/doc/dev-guides/_literal_ocf_failed_master_literal_9.html
rc = 9
agentlist = self._get_agents_by_type(agent_type) agentlist = self._get_agents_by_type(agent_type)
for hostname in self.options.get('test-hostnames'): for hostname in self.options.get('test-hostnames'):
if self._test_healthy(agentlist, hostname): if self._test_healthy(agentlist, hostname):
@ -520,74 +497,130 @@ class NeutronCleaner(object):
return rc return rc
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Neutron network node cleaning tool.') parser = argparse.ArgumentParser(
parser.add_argument("-c", "--auth-config", dest="authconf", default="/root/openrc", description='Neutron network node cleaning tool.')
help="Authenticating config FILE", metavar="FILE") parser.add_argument(
parser.add_argument("-t", "--auth-token", dest="auth-token", default=None, "-c",
help="Authenticating token (instead username/passwd)", metavar="TOKEN") "--auth-config",
parser.add_argument("-u", "--admin-auth-url", dest="admin-auth-url", default=None, dest="authconf",
help="Authenticating URL (admin)", metavar="URL") default="/etc/neutron/neutron.conf",
parser.add_argument("--retries", dest="retries", type=int, default=50, help="Read authconfig from service file",
help="try NN retries for API call", metavar="NN") metavar="FILE")
parser.add_argument("--sleep", dest="sleep", type=int, default=2, parser.add_argument(
help="sleep seconds between retries", metavar="SEC") "-t",
parser.add_argument("-a", "--agent", dest="agent", action="append", "--auth-token",
help="specyfy agents for cleaning", required=True) dest="auth-token",
parser.add_argument("--cleanup-ports", dest="cleanup-ports", action="store_true", default=False, default=None,
help="cleanup ports for given agents on this node") help="Authenticating token (instead username/passwd)",
parser.add_argument("--remove-self", dest="remove-self", action="store_true", default=False, metavar="TOKEN")
help="remove ourselves from agent list") parser.add_argument(
parser.add_argument("--activeonly", dest="activeonly", action="store_true", default=False, "-u",
help="cleanup only active ports") "--admin-auth-url",
parser.add_argument("--reschedule", dest="reschedule", action="store_true", default=False, dest="admin-auth-url",
help="reschedule given agents") default=None,
parser.add_argument("--remove-dead", dest="remove-dead", action="store_true", default=False, help="Authenticating URL (admin)",
help="remove dead agents while rescheduling") metavar="URL")
parser.add_argument("--test-alive-for-hostname", dest="test-hostnames", action="append", parser.add_argument(
help="testing agent's healthy for given hostname") "--retries",
parser.add_argument("--external-bridge", dest="external-bridge", default="br-ex", dest="retries",
help="external bridge name", metavar="IFACE") type=int,
parser.add_argument("--integration-bridge", dest="integration-bridge", default="br-int", default=50,
help="integration bridge name", metavar="IFACE") help="try NN retries for API call",
parser.add_argument("-l", "--log", dest="log", action="store", metavar="NN")
help="log file or logging.conf location") parser.add_argument(
parser.add_argument("--noop", dest="noop", action="store_true", default=False, "--sleep",
help="do not execute, print to log instead") dest="sleep",
parser.add_argument("--debug", dest="debug", action="store_true", default=False, type=int,
help="debug") default=2,
help="sleep seconds between retries",
metavar="SEC")
parser.add_argument(
"-a",
"--agent",
dest="agent",
action="append",
help="specyfy agents for cleaning",
required=True)
parser.add_argument(
"--cleanup-ports",
dest="cleanup-ports",
action="store_true",
default=False,
help="cleanup ports for given agents on this node")
parser.add_argument(
"--remove-self",
dest="remove-self",
action="store_true",
default=False,
help="remove ourselves from agent list")
parser.add_argument(
"--activeonly",
dest="activeonly",
action="store_true",
default=False,
help="cleanup only active ports")
parser.add_argument(
"--reschedule",
dest="reschedule",
action="store_true",
default=False,
help="reschedule given agents")
parser.add_argument(
"--remove-dead",
dest="remove-dead",
action="store_true",
default=False,
help="remove dead agents while rescheduling")
parser.add_argument(
"--test-alive-for-hostname",
dest="test-hostnames",
action="append",
help="testing agent's healthy for given hostname")
parser.add_argument(
"--external-bridge",
dest="external-bridge",
default="br-ex",
help="external bridge name",
metavar="IFACE")
parser.add_argument(
"--integration-bridge",
dest="integration-bridge",
default="br-int",
help="integration bridge name",
metavar="IFACE")
parser.add_argument(
"-l",
"--log",
dest="log",
action="store",
help="log to file instead of STDOUT")
parser.add_argument(
"--noop",
dest="noop",
action="store_true",
default=False,
help="do not execute, print to log instead")
parser.add_argument(
"--debug",
dest="debug",
action="store_true",
default=False,
help="debug")
args = parser.parse_args() args = parser.parse_args()
# if len(args) != 1: RETRY_COUNT = args.retries
# parser.error("incorrect number of arguments") RETRY_DELAY = args.sleep
# parser.print_help() args = parser.parse_args()
# setup logging
if args.log:
LOG = make_logger(
handler=logging.handlers.WatchedFileHandler(args.log))
#setup logging
if args.debug: if args.debug:
_log_level = logging.DEBUG LOG.setLevel(logging.DEBUG)
else:
_log_level = logging.INFO
if not args.log:
# log config or file not given -- log to console
LOG = logging.getLogger(LOG_NAME) # do not move to UP of file
_log_handler = logging.StreamHandler(sys.stdout)
_log_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
LOG.addHandler(_log_handler)
LOG.setLevel(_log_level)
elif args.log.split(os.sep)[-1] == 'logging.conf':
# setup logging by external file
import logging.config
logging.config.fileConfig(args.log)
LOG = logging.getLogger(LOG_NAME) # do not move to UP of file
else:
# log to given file
LOG = logging.getLogger(LOG_NAME) # do not move to UP of file
LOG.addHandler(logging.handlers.WatchedFileHandler(args.log))
LOG.setLevel(_log_level)
LOG.info("Started: {0}".format(' '.join(sys.argv))) LOG.info("Started: {0}".format(' '.join(sys.argv)))
cleaner = NeutronCleaner(get_authconfig(args.authconf), options=vars(args), log=LOG) cleaner = NeutronCleaner(options=vars(args), log=LOG)
rc = 0 rc = 0
if vars(args).get('test-hostnames'): if vars(args).get('test-hostnames'):
rc = cleaner.test_healthy(args.agent[0]) rc = cleaner.test_healthy(args.agent[0])
@ -596,5 +629,3 @@ if __name__ == '__main__':
cleaner.do(i) cleaner.do(i)
LOG.debug("End.") LOG.debug("End.")
sys.exit(rc) sys.exit(rc)
#
###