fuel-plugin-neutron-vpnaas/deployment_scripts/puppet/modules/vpnaas/files/ocf/neutron-agent-vpn

595 lines
19 KiB
Bash

#!/bin/bash
#
#
# OpenStack L3 Service (neutron-vpn-agent)
#
# Description: Manages an OpenStack L3 Service (neutron-vpn-agent) process as an HA resource
#
# Authors: Emilien Macchi
# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han
#
# Support: openstack@lists.launchpad.net
# License: Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_plugin_config
# OCF_RESKEY_vpn_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_neutron_server_port
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
umask 0022
#######################################################################
# Fill in some defaults if no values are specified
PATH=/sbin:/usr/sbin:/bin:/usr/bin
OCF_RESKEY_binary_default="neutron-vpn-agent"
OCF_RESKEY_config_default="/etc/neutron/neutron.conf"
OCF_RESKEY_keystone_config_default="/etc/keystone/keystone.conf"
OCF_RESKEY_vpn_config_default="/etc/neutron/vpn_agent.ini"
OCF_RESKEY_plugin_config_default="/etc/neutron/l3_agent.ini"
OCF_RESKEY_log_file_default="/var/log/neutron/vpn-agent.log"
OCF_RESKEY_user_default="neutron"
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2.0"
OCF_RESKEY_username_default="neutron"
OCF_RESKEY_password_default="neutron_pass"
OCF_RESKEY_tenant_default="services"
OCF_RESKEY_external_bridge_default="br-ex"
OCF_RESKEY_multiple_agents_default=true
OCF_RESKEY_rescheduling_tries_default=5
OCF_RESKEY_rescheduling_interval_default=33
OCF_RESKEY_debug_default=false
: ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}}
: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}}
: ${OCF_RESKEY_password=${OCF_RESKEY_password_default}}
: ${OCF_RESKEY_tenant=${OCF_RESKEY_tenant_default}}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_keystone_config=${OCF_RESKEY_keystone_config_default}}
: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}}
: ${OCF_RESKEY_vpn_config=${OCF_RESKEY_vpn_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_multiple_agents=${OCF_RESKEY_multiple_agents_default}}
: ${OCF_RESKEY_external_bridge=${OCF_RESKEY_external_bridge_default}}
: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}}
: ${OCF_RESKEY_rescheduling_tries=${OCF_RESKEY_rescheduling_tries_default}}
: ${OCF_RESKEY_rescheduling_interval=${OCF_RESKEY_rescheduling_interval_default}}
: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack L3 Service (neutron-vpn-agent) process as an HA resource
The 'start' operation starts the networking service.
The 'stop' operation stops the networking service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the networking service is running
The 'monitor' operation reports whether the networking service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="neutron-vpn-agent">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the OpenStack Router (neutron-vpn-agent)
May manage a neutron-vpn-agent instance or a clone set that
creates a distributed neutron-vpn-agent cluster.
</longdesc>
<shortdesc lang="en">Manages the OpenStack L3 Service (neutron-vpn-agent)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Router server binary (neutron-vpn-agent)
</longdesc>
<shortdesc lang="en">OpenStack Router server binary (neutron-vpn-agent)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Router (neutron-server) configuration file
</longdesc>
<shortdesc lang="en">OpenStack Router (neutron-server) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="keystone_config" unique="0" required="0">
<longdesc lang="en">
Location of the Keystone configuration file
</longdesc>
<shortdesc lang="en">OpenStack Keystone config file</shortdesc>
<content type="string" default="${OCF_RESKEY_keystone_config_default}" />
</parameter>
<parameter name="plugin_config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack L3 Service (neutron-l3-agent) configuration file
</longdesc>
<shortdesc lang="en">OpenStack Router (neutron-l3-agent) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_plugin_config_default}" />
</parameter>
<parameter name="vpn_config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack L3 Service (neutron-vpn-agent) configuration file
</longdesc>
<shortdesc lang="en">OpenStack Router (neutron-vpn-agent) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_vpn_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running OpenStack L3 Service (neutron-vpn-agent)
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-vpn-agent) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this OpenStack L3 Service (neutron-vpn-agent) instance
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-vpn-agent) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="multiple_agents" unique="0" required="0">
<longdesc lang="en">
Flag, that switch RCS-agent behavior for multiple or single L3-agent.
</longdesc>
<shortdesc lang="en">Switsh between multiple or single L3-agent behavior</shortdesc>
<content type="string" default="${OCF_RESKEY_multiple_agents_default}" />
</parameter>
<parameter name="log_file" unique="0" required="0">
<longdesc lang="en">
The log file to use for this OpenStack L3 Service (neutron-vpn-agent) instance
</longdesc>
<shortdesc lang="en">OpenStack L3 Service (neutron-vpn-agent) log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_file_default}" />
</parameter>
<parameter name="neutron_server_port" unique="0" required="0">
<longdesc lang="en">
The listening port number of the AMQP server. Mandatory to perform a monitor check
</longdesc>
<shortdesc lang="en">AMQP listening port</shortdesc>
<content type="integer" default="${OCF_RESKEY_neutron_server_port_default}" />
</parameter>
<parameter name="username" unique="0" required="0">
<longdesc lang="en">
Neutron username for port list fetching
</longdesc>
<shortdesc lang="en">Neutron username</shortdesc>
<content type="string" default="${OCF_RESKEY_username_default}" />
</parameter>
<parameter name="password" unique="0" required="0">
<longdesc lang="en">
Neutron password for port list fetching
</longdesc>
<shortdesc lang="en">Neutron password</shortdesc>
<content type="string" default="${OCF_RESKEY_password_default}" />
</parameter>
<parameter name="os_auth_url" unique="0" required="0">
<longdesc lang="en">
URL of keystone
</longdesc>
<shortdesc lang="en">Keystone URL</shortdesc>
<content type="string" default="${OCF_RESKEY_os_auth_url_default}" />
</parameter>
<parameter name="tenant" unique="0" required="0">
<longdesc lang="en">
Admin tenant name
</longdesc>
<shortdesc lang="en">Admin tenant</shortdesc>
<content type="string" default="${OCF_RESKEY_tenant_default}" />
</parameter>
<parameter name="external_bridge" unique="0" required="0">
<longdesc lang="en">
External bridge for vpn-agent
</longdesc>
<shortdesc lang="en">External bridge</shortdesc>
<content type="string" />
</parameter>
<parameter name="debug" unique="0" required="0">
<longdesc lang="en">
Enable debug logging
</longdesc>
<shortdesc lang="en">Enable debug logging</shortdesc>
<content type="boolean" default="false"/>
</parameter>
<parameter name="rescheduling_tries" unique="0" required="0">
<longdesc lang="en">
Tries to start rescheduling script after start of agent.
</longdesc>
<shortdesc lang="en">Tries to start rescheduling script after start of agent.</shortdesc>
<content type="boolean" default="${OCF_RESKEY_rescheduling_tries_default}"/>
</parameter>
<parameter name="rescheduling_interval" unique="0" required="0">
<longdesc lang="en">
Interval between starts of rescheduling script.
</longdesc>
<shortdesc lang="en">Interval between starts of rescheduling script.</shortdesc>
<content type="boolean" default="${OCF_RESKEY_rescheduling_interval_default}"/>
</parameter>
<parameter name="syslog" unique="0" required="0">
<longdesc lang="en">
Enable logging to syslog
</longdesc>
<shortdesc lang="en">Enable logging to syslog</shortdesc>
<content type="boolean" default="false"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the OpenStack L3 Service (neutron-vpn-agent)
</longdesc>
<shortdesc lang="en">Additional parameters for neutron-vpn-agent</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_worker_pid() {
pid=`pgrep -u ${OCF_RESKEY_user} -fol ${OCF_RESKEY_binary} | grep -E "python\s+\/usr\/bin" | awk '{print $1}'`
echo $pid
}
#######################################################################
# Functions invoked by resource manager actions
neutron_l3_agent_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary netstat
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
setup_auth() {
# setup token-based authentication if it possible
AUTH_TOKEN=""
if [[ -f $OCF_RESKEY_keystone_config ]] ; then
AUTH_TOKEN=$(grep -v '#' $OCF_RESKEY_keystone_config | grep -i 'admin_token\s*=\s*' | awk -F'=' '{print $2}')
fi
AUTH_TAIL=""
if [[ -n "$AUTH_TOKEN" ]] ; then
AUTH_TAIL="--admin-auth-url=${OCF_RESKEY_os_auth_url} --auth-token=${AUTH_TOKEN}"
fi
true
}
neutron_l3_agent_status() {
local pid
local f_pid
local rc
# check and make PID file dir
local PID_DIR="$( dirname ${OCF_RESKEY_pid} )"
if [ ! -d "${PID_DIR}" ] ; then
ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}"
mkdir -p "${PID_DIR}"
chown -R ${OCF_RESKEY_user} "${PID_DIR}"
chmod 755 "${PID_DIR}"
fi
pid=`get_worker_pid`
if [ "xxx$pid" == "xxx" ] ; then
ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running."
return $OCF_NOT_RUNNING
fi
#ocf_log debug "PID='$pid'"
# Check PID file and create if need
if [ ! -f $OCF_RESKEY_pid ] ; then
ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found."
ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..."
echo $pid > $OCF_RESKEY_pid
return $OCF_SUCCESS
fi
# compare PID from file with PID from `pgrep...`
f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'`
if [ "xxx$pid" == "xxx$f_pid" ]; then
return $OCF_SUCCESS
fi
# at this point we have PID file and PID from it
# differents with PID from `pgrep...`
if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then
# process with PID from PID-file not found
ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found."
ocf_log warn "PID-file will be re-created (with PID=$pid)."
echo $pid > $OCF_RESKEY_pid
return $OCF_SUCCESS
fi
# at this point we have alien PID-file and running prosess with this PID.
ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid"
return $OCF_ERR_GENERIC
}
get_ns_list() {
local rv=`ip netns list | grep -Ee "^qrouter-.*"`
echo $rv
}
get_pid_list_for_ns_list() {
# the first parameter is a list of ns names for searching pids
local ns_list="$1"
local pids=`for netns in $ns_list ; do ip netns exec $netns lsof -n -i -t ; done`
echo $pids
}
clean_up() {
# kill processes inside network namespaces
ns_list=`get_ns_list`
# kill all proceses from all dhcp-agent's net.namespaces, that using ip
count=3 # we will try kill process 3 times
while [ $count -gt 0 ]; do
# we can't use ps, because ps can't select processes for given network namespace
inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"`
if [ -z "$inside_ns_pids" ] ; then
break
fi
for ns_pid in $inside_ns_pids ; do
ocf_run kill $ns_pid
done
sleep 1
count=$(($count - 1))
done
# kill all remaining proceses, that not died by simple kill
inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"`
if [ ! -z "$inside_ns_pids" ] ; then
for ns_pid in $inside_ns_pids ; do
ocf_run kill -9 $ns_pid
done
fi
# cleanup network interfaces
q-agent-cleanup.py --agent=l3 --cleanup-ports
}
clean_up_namespaces() {
# kill unnided network namespaces.
#
# Be carefully. In each network namespace shouldn't be any processes
# using network!!! use clean_up before it
ns_list=`get_ns_list`
if [ ! -z "$ns_list" ] ; then
for ns_name in $ns_list ; do
ocf_run ip --force netns del $ns_name
done
fi
}
neutron_l3_agent_monitor() {
neutron_l3_agent_status
rc=$?
return $rc
}
neutron_l3_agent_start() {
local rc
neutron_l3_agent_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "OpenStack neutron-l3-agent already running"
return $OCF_SUCCESS
fi
clean_up
sleep 1
clean_up_namespaces
# run and detach to background agent as daemon.
# Don't use ocf_run as we're sending the tool's output to /dev/null
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
--config-file=$OCF_RESKEY_plugin_config --config-file=$OCF_RESKEY_vpn_config --log-file=$OCF_RESKEY_log_file $OCF_RESKEY_additional_parameters \
>> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid'
ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
neutron_l3_agent_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ] ; then
ocf_log err "OpenStack neutron-l3-agent start failed"
exit $OCF_ERR_GENERIC
fi
sleep 3
done
if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then
# detach deferred rescheduling procedure
RESCHEDULING_CMD="q-agent-cleanup.py --agent=l3 --reschedule --remove-dead ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log"
RESCH_CMD=''
for ((i=0; i<$OCF_RESKEY_rescheduling_tries; i++)) ; do
RESCH_CMD="$RESCH_CMD sleep $OCF_RESKEY_rescheduling_interval ; $RESCHEDULING_CMD ;"
done
bash -c "$RESCH_CMD" &
fuel-fdb-cleaner --ssh-keyfile /root/.ssh/id_rsa_neutron -l /var/log/neutron/fdb-cleaner.log
fi
ocf_log info "OpenStack Router (neutron-l3-agent) started"
return $OCF_SUCCESS
}
neutron_l3_agent_stop() {
local rc
local pid
neutron_l3_agent_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
clean_up
sleep 1
clean_up_namespaces
ocf_log info "OpenStack Router ($OCF_RESKEY_binary) already stopped"
return $OCF_SUCCESS
fi
# Try SIGTERM
pid=`get_worker_pid`
if [ "xxx$pid" == "xxx" ] ; then
ocf_log warn "OpenStack Router ($OCF_RESKEY_binary) not running."
#return $OCF_NOT_RUNNING
return $OCF_SUCCESS
fi
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "OpenStack Router ($OCF_RESKEY_binary) couldn't be stopped"
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
neutron_l3_agent_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "OpenStack Router ($OCF_RESKEY_binary) still hasn't stopped yet. Waiting ..."
done
neutron_l3_agent_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "OpenStack Router ($OCF_RESKEY_binary) failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
ocf_log info "OpenStack Router ($OCF_RESKEY_binary) stopped"
ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
rm -f $OCF_RESKEY_pid
clean_up
sleep 1
clean_up_namespaces
if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then
echo ok >> /var/log/neutron/rescheduling.log &
q-agent-cleanup.py --agent=l3 --remove-self ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log &
fi
sleep 3
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
neutron_l3_agent_validate || exit $?
setup_auth || exit $?
# What kind of method was invoked?
case "$1" in
start) neutron_l3_agent_start;;
stop) neutron_l3_agent_stop;;
status) neutron_l3_agent_status;;
monitor) neutron_l3_agent_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac