Rabbitmq ocf master/slave (WORK IN PROGRESS)

Blueprint: rabbitmq-cluster-controlled-by-pacemaker
Closes-bug: #1318936
Change-Id: Ieab7156fee2b70b32dbf5a2852627495cf1b650e
This commit is contained in:
Sergey Vasilenko 2014-02-28 22:04:15 +04:00
parent ab3f15ddec
commit aeac878fae
9 changed files with 1149 additions and 387 deletions

View File

@ -0,0 +1,983 @@
#!/bin/bash
#
# See usage() function below for more details ...
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
PATH=/sbin:/usr/sbin:/bin:/usr/bin
OCF_RESKEY_binary_default="/usr/sbin/rabbitmq-server"
OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl"
OCF_RESKEY_debug_default=false
OCF_RESKEY_username_default="rabbitmq"
OCF_RESKEY_groupname_default="rabbitmq"
OCF_RESKEY_pid_file_default=/var/run/rabbitmq/pid
OCF_RESKEY_log_dir_default=/var/log/rabbitmq
OCF_RESKEY_mnesia_base_default=/var/lib/rabbitmq/mnesia
OCF_RESKEY_node_port_default=5672
: ${HA_LOGTAG="lrmd"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}}
: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}}
: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}}
: ${OCF_RESKEY_groupname=${OCF_RESKEY_groupname_default}}
: ${OCF_RESKEY_log_dir=${OCF_RESKEY_log_dir_default}}
: ${OCF_RESKEY_mnesia_base=${OCF_RESKEY_mnesia_base_default}}
: ${OCF_RESKEY_pid_file=${OCF_RESKEY_pid_file_default}}
: ${OCF_RESKEY_node_port=${OCF_RESKEY_node_port_default}}
#######################################################################
OCF_RESKEY_shutdown_time_default=$(( $OCF_RESKEY_CRM_meta_timeout / 1000 - 10 ))
: ${OCF_RESKEY_shutdown_time=${OCF_RESKEY_shutdown_time_default}}
OCF_RESKEY_start_time_default=$(( ${OCF_RESKEY_CRM_meta_timeout} / 1000 - 10 ))
: ${OCF_RESKEY_start_time=${OCF_RESKEY_start_time_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an ${OCF_RESKEY_binary} process as an HA resource
The 'start' operation starts the networking service.
The 'stop' operation stops the networking service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the networking service is running
The 'monitor' operation reports whether the networking service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="${OCF_RESKEY_binary}">
<version>1.0</version>
<longdesc lang="en">
Resource agent for ${OCF_RESKEY_binary}
</longdesc>
<shortdesc lang="en">Resource agent for ${OCF_RESKEY_binary}</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
RabbitMQ binary
</longdesc>
<shortdesc lang="en">RabbitMQ binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="ctl" unique="0" required="0">
<longdesc lang="en">
rabbitctl binary
</longdesc>
<shortdesc lang="en">rabbitctl binary binary</shortdesc>
<content type="string" default="${OCF_RESKEY_ctl_default}" />
</parameter>
<parameter name="pid_file" unique="0" required="0">
<longdesc lang="en">
RabbitMQ PID file
</longdesc>
<shortdesc lang="en">RabbitMQ PID file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_file_default}" />
</parameter>
<parameter name="log_dir" unique="0" required="0">
<longdesc lang="en">
RabbitMQ log directory
</longdesc>
<shortdesc lang="en">RabbitMQ log directory</shortdesc>
<content type="string" default="${OCF_RESKEY_log_dir_default}" />
</parameter>
<parameter name="username" unique="0" required="0">
<longdesc lang="en">
RabbitMQ user name
</longdesc>
<shortdesc lang="en">RabbitMQ user name</shortdesc>
<content type="string" default="${OCF_RESKEY_username_default}" />
</parameter>
<parameter name="groupname" unique="0" required="0">
<longdesc lang="en">
RabbitMQ group name
</longdesc>
<shortdesc lang="en">RabbitMQ group name</shortdesc>
<content type="string" default="${OCF_RESKEY_groupname_default}" />
</parameter>
<parameter name="shutdown_time" unique="0" required="0">
<longdesc lang="en">
Timeout for shutdown rabbitmq server before kill -TERM
</longdesc>
<shortdesc lang="en">Timeout for shutdown rabbitmq server</shortdesc>
<content type="string" default="${OCF_RESKEY_shutdown_time_default}" />
</parameter>
<parameter name="start_time" unique="0" required="0">
<longdesc lang="en">
Timeout for start rabbitmq server
</longdesc>
<shortdesc lang="en">Timeout for start rabbitmq server</shortdesc>
<content type="string" default="${OCF_RESKEY_start_time_default}" />
</parameter>
<parameter name="debug" unique="0" required="0">
<longdesc lang="en">
The debug flag for agent (${OCF_RESKEY_binary}) instance.
In the /tmp/ directory will be created rmq-* files for log
some operations and ENV values inside OCF-script.
</longdesc>
<shortdesc lang="en">AMQP server (${OCF_RESKEY_binary}) debug flag</shortdesc>
<content type="boolean" default="${OCF_RESKEY_debug_default}" />
</parameter>
<parameter name="mnesia_base" unique="0" required="0">
<longdesc lang="en">
Base directory for storing Mnesia files
</longdesc>
<shortdesc lang="en">Base directory for storing Mnesia files</shortdesc>
<content type="boolean" default="${OCF_RESKEY_mnesia_base_default}" />
</parameter>
<parameter name="node_port" unique="0" required="0">
<longdesc lang="en">
${OCF_RESKEY_binary} should listen on this port
</longdesc>
<shortdesc lang="en">${OCF_RESKEY_binary} should listen on this port</shortdesc>
<content type="boolean" default="${OCF_RESKEY_node_port_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" depth="0" timeout="30" interval="5" />
<action name="monitor" depth="0" timeout="30" interval="3" role="Master"/>
<action name="promote" timeout="30" />
<action name="demote" timeout="30" />
<action name="notify" timeout="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
now() {
date -u +%s
}
master_score() {
local score=$1
if [[ -z $score ]] ; then
score=0
fi
ocf_run crm_master -l reboot -v $score || return $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
my_host() {
local hostlist="$1"
local hostname=$(hostname -s)
local hn
local rc
local LH="${LL} my_host():"
rc=1
for host in hostlist ; do
hn=$(echo "$hostlist" | awk -F. '{print $1}')
if [[ "X${hostname}" == "X${hn}" ]] ; then
rc=0
break
fi
done
return $rc
}
srv_uptime() {
local stime
stime=$(cat $RMQ_START_TIME 2>/dev/null)
rc=$?
if [[ $rc == 0 ]] ; then
echo $(( $(now) - ${stime} ))
else
echo 0
fi
return $OCF_SUCCESS
}
rmq_setup_env() {
local H
H=`hostname -s`
export RABBITMQ_NODENAME="rabbit@${H}"
export RABBITMQ_NODE_PORT=$OCF_RESKEY_node_port
export RABBITMQ_PID_FILE=$OCF_RESKEY_pid_file
MNESIA_FILES="${OCF_RESKEY_mnesia_base}/rabbit@${H}"
RMQ_START_TIME="${MNESIA_FILES}/ocf_server_start_time.txt"
MASTER_FLAG_FILE="${MNESIA_FILES}/ocf_master_for_${OCF_RESOURCE_INSTANCE}"
# check and make PID file dir
local PID_DIR=$( dirname $OCF_RESKEY_pid_file )
if [ ! -d ${PID_DIR} ] ; then
mkdir -p ${PID_DIR}
chown -R ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} ${PID_DIR}
chmod 755 ${PID_DIR}
fi
export LL="${OCF_RESOURCE_INSTANCE}:"
}
rabbit_node_name() {
echo "rabbit@"$(echo "$1" | awk -F. '{print $1}')
}
reset_mnesia() {
get_status ; rc=$?
if [[ $rc == 0 ]] ; then
ocf_run ${OCF_RESKEY_ctl} reset ; rc=$?
if [[ $rc != 0 ]] ; then
ocf_run ${OCF_RESKEY_ctl} force_reset ; rc=$?
if [[ $rc != 0 ]] ; then
ocf_log err "Mnesia couldn't cleaned, even by force-reset command."
# #ocf_run killall beam
# kill_rmq_and_remove_pid
# ocf_run find ${MNESIA_FILES} -type f -delete
# start_beam_process
fi
fi
else
ocf_run find ${MNESIA_FILES} -type f -delete
ocf_log warn "Mnesia files appear corrupted and have been removed."
fi
return $OCF_SUCCESS
}
get_nodes__base(){
local cl="{$1,"
local c_status=$(${OCF_RESKEY_ctl} cluster_status 2>/dev/null)
if [[ $? != 0 ]] ; then
echo ''
return $OCF_ERR_GENERIC
fi
# translate line like '{running_nodes,['rabbit@node-1','rabbit@node-2','rabbit@node-3']},' to node_list
echo $(echo "${c_status}" | grep "${cl}" | awk -F, '{ for (i=1;i<=NF;i++) { if ($i ~ /@/) { gsub(/[\[\]}{]/,"",$i); print $i; } }}' | tr -d "\'")
return $OCF_SUCCESS
}
get_nodes() {
echo $(get_nodes__base nodes)
return $?
}
get_running_nodes() {
echo $(get_nodes__base running_nodes)
return $?
}
check_need_join_to() {
local join_to=$(rabbit_node_name $1)
local node
local running_nodes=$(get_nodes)
rc=0
for node in $running_nodes ; do
if [[ ${join_to} == ${node} ]] ; then
rc=1
break
fi
done
return $rc
}
kill_rmq_and_remove_pid() {
local pid
local LH="${LL} kill_rmq_and_remove_pid():"
if [[ -f $OCF_RESKEY_pid_file ]] ; then
pid=$(cat $OCF_RESKEY_pid_file)
# todo: check content for digital
if [[ -d /proc/${pid}/ ]] ; then
ocf_run kill -9 $pid
ocf_log warn "${LH} RMQ-runtime (beam) PID=${pid} stopped by 'kill -9', sorry..."
fi
ocf_run rm -f $OCF_RESKEY_pid_file
fi
}
action_validate() {
# todo(sv): validate some incoming parameters
return $OCF_SUCCESS
}
join_to_cluster() {
local node="$1"
local rmq_node=$(rabbit_node_name $node)
local rc
local LH="${LL} join_to_cluster():"
ocf_log info "${LH} start."
ocf_log info "${LH} Joining to cluster by node '${rmq_node}'."
get_status rabbit ; rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log info "${LH} rabbitmq app will be stopped."
stop_rmq_server_app
rc=$?
if [[ $rc != 0 ]] ; then
ocf_log err "${LH} Can't stop rabbitmq app by stop_app command."
ocf_run killall beam
return $OCF_SUCCESS
fi
fi
# ccc=$(${OCF_RESKEY_ctl} cluster_status 2>&1)
# ocf_log info "$ccc"
ocf_run ${OCF_RESKEY_ctl} join_cluster $rmq_node
rc=$?
if [[ $rc != 0 ]] ; then
ocf_log err "${LH} Can't join to cluster by node '${rmq_node}'."
ocf_run killall beam
return $OCF_SUCCESS
fi
sleep 2
start_rmq_server_app
rc=$?
if [[ $rc != 0 ]] ; then
ocf_log err "${LH} Can't start RMQ app after join to cluster."
ocf_run killall beam
return $OCF_SUCCESS
else
ocf_log info "${LH} Joined to cluster succesfully."
fi
ocf_log info "${LH} end."
return $rc
}
unjoin_nodes_from_cluster() {
local nodelist="$1"
local hostname
local nodename
local rc
local rnode
local nodes_in_cluster
local LH="${LL} unjoin_nodes_from_cluster():"
nodes_in_cluster=$(get_nodes)
if [[ $? != $OCF_SUCCESS ]] ; then
# no nodes in node list, nothing to do
return $OCF_SUCCESS
fi
for hostname in $nodelist ; do
nodename=$(rabbit_node_name $hostname)
if [[ "$nodename" == "$RABBITMQ_NODENAME" ]] ; then
continue
fi
for rnode in $nodes_in_cluster ; do
if [[ "$nodename" == "$rnode" ]] ; then
# unjoin node
ocf_run ${OCF_RESKEY_ctl} forget_cluster_node ${nodename} 2>&1
rc=$?
if [[ $rc == 0 ]] ; then
ocf_log info "${LH} node '${nodename}' unjoined succesfully."
else
ocf_log info "${LH} unjoining node '${nodename}' was fail."
fi
fi
done
done
return $OCF_SUCCESS
}
stop_server_process() {
local pid
local ts_end
local rc
local LH="${LL} stop_server_process():"
pid=$(cat ${OCF_RESKEY_pid_file})
if [[ $? != 0 ]] ; then
return $OCF_ERR_GENERIC
fi
# detach 'stop' command and monitor its result later
# because 'rabbitmqctl stop' can hangs
${OCF_RESKEY_ctl} stop ${OCF_RESKEY_pid_file} 2>&1 >> "${OCF_RESKEY_log_dir}/shutdown_log" &
stop_pid=$! # PID of root of stop command process tree
ts_end=$(( $(now) + ${OCF_RESKEY_shutdown_time} ))
rc=-1
while [ $(now) -lt $ts_end ] ; do
# waiting for normal shutdown of rabbitmq-server
if [[ ! -d /proc/${pid} ]] ; then
rc=$OCF_SUCCESS
break
fi
ocf_log info "${LH} Waiting for stop RMQ-server process (PID=${pid})..."
sleep 3
done
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log info "${LH} RMQ-server process (PID=${pid}) stopped succesfully."
else
# RMQ-server process can't stop succesfully
if [[ -d /proc/$stop_pid/ ]] ; then
ocf_run kill -9 $stop_pid
fi
fi
kill_rmq_and_remove_pid
return $OCF_SUCCESS
}
stop_rmq_server_app() {
local rc
local stop_pid
local waiting_to_stop_timeout=30 # todo: make as incoming parameter
local ts_end
get_status
rc=$?
if [[ $rc != 0 ]] ; then
return $OCF_SUCCESS
fi
# detach stop process, because stop_app may be hung
${OCF_RESKEY_ctl} stop_app 2>&1 > "${OCF_RESKEY_log_dir}/shutdown_log" &
stop_pid=$! # PID of root of stop command process tree
# waiting for rabbitmq stopped
ts_end=$(( $(now) + $waiting_to_stop_timeout ))
rc=$OCF_ERR_GENERIC
while [ $(now) -lt $ts_end ]; do
get_status rabbit
rc=$?
if [[ $rc != 0 ]] ; then
rc=$OCF_SUCCESS
break
fi
sleep 2
done
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log info "${LH} RMQ-server app stopped succesfully."
else
# RMQ-server can't stop succesfully
if [[ -d /proc/$stop_pid/ ]] ; then
ocf_run kill -9 $stop_pid
fi
fi
return $rc
}
start_beam_process() {
local rc
local ts_end
local pf_end
local pid
local LH="${LL} start_beam_process():"
# remove old PID-file if it exists
if [[ -f $OCF_RESKEY_pid_file ]] ; then
ocf_log warn "${LH} found old PID-file '${OCF_RESKEY_pid_file}'."
pid=$(cat ${OCF_RESKEY_pid_file})
if [[ -d /proc/${pid} && ! -z ${pid} ]] ; then
ocf_run cat /proc/${pid}/cmdline | grep -c 'bin/beam' 2>&1 > /dev/null
if [[ $? == 0 ]] ; then
ocf_log warn "${LH} found beam process with PID=${pid}, killing...'."
ocf_run kill -9 $pid
else
ocf_log err "${LH} found unknown process with PID=${pid} from '${OCF_RESKEY_pid_file}'."
return $OCF_ERR_GENERIC
fi
fi
ocf_run rm -rf $OCF_RESKEY_pid_file
fi
# run beam process
RABBITMQ_NODE_ONLY=1 ${OCF_RESKEY_binary} > "${OCF_RESKEY_log_dir}/startup_log" 2>/dev/null &
ts_end=$(( $(now) + ${OCF_RESKEY_start_time} ))
rc=$OCF_ERR_GENERIC
while [ $(now) -lt ${ts_end} ]; do
# waiting for normal start of beam
pid=0
pf_end=$(( $(now) + 3 ))
while [ $(now) -lt ${pf_end} ]; do
# waiting for OCF_RESKEY_pid_file of beam process
if [[ -f $OCF_RESKEY_pid_file ]] ; then
pid=$(cat ${OCF_RESKEY_pid_file})
break
fi
sleep 1
done
if [[ $pid != 0 && -d /proc/${pid} ]] ; then
rc=$OCF_SUCCESS
break
fi
sleep 2
done
if [[ $rc != $OCF_SUCCESS ]]; then
if [[ "${pid}" == "0" ]] ; then
ocf_log warn "${LH} PID-file '${OCF_RESKEY_pid_file}' not found"
fi
ocf_log err "${LH} RMQ-runtime (beam) didn't start succesfully (rc=${rc})."
fi
return $rc
}
try_to_start_rmq_app() {
local startup_log="$1"
local rc
local LH="${LL} start_rmq_server_app():"
if [[ -z $startup_log ]] ; then
startup_log="${OCF_RESKEY_log_dir}/startup_log"
fi
${OCF_RESKEY_ctl} start_app > $startup_log 2>&1
rc=$?
if [[ $rc == 0 ]] ; then
ocf_run ${OCF_RESKEY_ctl} wait ${OCF_RESKEY_pid_file}
rc=$OCF_SUCCESS
else
rc=$OCF_ERR_GENERIC
fi
return $rc
}
start_rmq_server_app() {
local rc
local startup_log="${OCF_RESKEY_log_dir}/startup_log"
local startup_output
local LH="${LL} start_rmq_server_app():"
ocf_log info "${LH} begin."
get_status
if [[ $? != $OCF_SUCCESS ]] ; then
ocf_log info "${LH} RMQ-runtime (beam) not started, starting..."
start_beam_process || return $OCF_ERR_GENERIC
fi
get_status rabbit
if [[ $? != $OCF_SUCCESS ]] ; then
ocf_log info "${LH} RMQ-server app not started, starting..."
try_to_start_rmq_app "$startup_log"
rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
# rabbitmq-server started successfuly as master of cluster
master_score 1 # minimal positive master-score for this node.
# create timestamp file
test -f $RMQ_START_TIME || now > $RMQ_START_TIME
else
# error at start RMQ-server
ocf_log warn "${LH} RMQ-server app can't start without Mnesia cleaning."
for ((a=10; a > 0 ; a--)) ; do
rc=$OCF_ERR_GENERIC
reset_mnesia || break
try_to_start_rmq_app "$startup_log"
rc=$?
if [[ $rc == $OCF_SUCCESS ]]; then
stop_rmq_server_app ; rc=$?
if [[ $rc == $OCF_SUCCESS ]]; then
ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
rc=$OCF_SUCCESS
master_score 0
break
else
ocf_log err "${LH} RMQ-server app can't stopped while Mnesia cleaning. beam will be killed emergency."
ocf_run killall -9 beam
return $OCF_ERR_GENERIC
fi
fi
done
if [[ $rc == $OCF_ERR_GENERIC ]] ; then
ocf_log err "${LH} RMQ-server can't started while many tries. beam will be killed emergency."
ocf_run killall -9 beam
fi
fi
else
rc=$OCF_SUCCESS
fi
ocf_log info "${LH} end."
return $rc
}
get_status() {
local what="$1"
local rc
local body
body=$( ${OCF_RESKEY_ctl} status 2>&1 )
rc=$?
if [[ $rc != 0 ]] ; then
return $OCF_NOT_RUNNING
fi
if [[ ! -z $what ]] ; then
rc=$OCF_NOT_RUNNING
echo "$body" | grep "\{${what}," 2>&1 > /dev/null && rc=$OCF_SUCCESS
fi
return $rc
}
action_status() {
local rc
get_status
rc=$?
return $rc
}
get_monitor() {
local rc
local scope
local master_for_queues="master_for_queues-$OCF_RESOURCE_INSTANCE"
local LH="${LL} get_monitor():"
get_status
rc=$?
if [[ $rc == $OCF_NOT_RUNNING ]] ; then
ocf_log info "${LH} get_status() returns ${rc}."
master_score 0
ocf_run crm_attribute -N $(crm_node -n) -n ${master_for_queues} -l reboot -v 0
return $OCF_NOT_RUNNING
elif [[ $rc == $OCF_SUCCESS && -f ${MASTER_FLAG_FILE} ]] ; then
rc=$OCF_RUNNING_MASTER
fi
master_score $(srv_uptime)
score=$( ${OCF_RESKEY_ctl} list_queues pid 2>/dev/null | grep -c "$RABBITMQ_NODENAME" )
if [[ $? == 0 ]] ; then
ocf_run crm_attribute -N $(crm_node -n) -n ${master_for_queues} -l reboot -v $score
fi
return $rc
}
action_monitor() {
local rc
local LH="${LL} monitor:"
ocf_log debug "${LH} action start."
get_monitor ; rc=$?
ocf_log debug "${LH} action end."
return $rc
}
action_start() {
local rc
local msg
local master_node
local LH="${LL} start:"
if [[ ${OCF_RESKEY_debug} == "true" ]] ; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-start.log
env >> /tmp/rmq-start.log
echo "$d [start] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_status
rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log warn "${LH} RMQ-runtime (beam) already started."
return $OCF_SUCCESS
fi
ocf_log info "${LH} RMQ going to start."
start_rmq_server_app
rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log info "${LH} RMQ started succesfully."
fi
ocf_log info "${LH} action end."
return $rc
}
action_stop() {
local rc
local LH="${LL} stop:"
if [[ ${OCF_RESKEY_debug} == "true" ]] ; then
d=$(date '+%Y%m%d %H:%M:%S')
echo $d >> /tmp/rmq-stop.log
env >> /tmp/rmq-stop.log
echo "$d [stop] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_status
rc=$?
if [[ $rc == $OCF_NOT_RUNNING ]] ; then
ocf_log warn "${LH} RMQ-runtime (beam) not running."
return $OCF_SUCCESS
fi
# remove master flag
ocf_run rm -f ${MASTER_FLAG_FILE}
# remove master score
master_score 0
ocf_log info "${LH} RMQ-runtime (beam) going to down."
stop_server_process
rc=$?
# remove file with rmq-server start timestamp
rm -f ${RMQ_START_TIME}
#todo: make this timeout corresponded to the stop timeout for resource
sleep 10
ocf_log info "${LH} action end."
return $rc
}
#######################################################################
jjj_join () {
local join_to="$1"
local rc
local LH="${LL} jjj_join:"
my_host ${join_to}
rc=$?
ocf_log debug "${LH} node='${join_to}' rc='${rc}'"
# Check whether join to himself or master host not given
if [[ $rc != 0 && $join_to != '' ]] ; then
# check, whether this host already joined to cluster
check_need_join_to ${join_to}
rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
ocf_log info "${LH} Joining to cluster by node '${join_to}'"
join_to_cluster "${join_to}"
ocf_log info "${LH} May be joined."
fi
fi
}
action_notify() {
local rc
local LH="${LL} notify:"
if [[ ${OCF_RESKEY_debug} == "true" ]] ; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-notify.log
env >> /tmp/rmq-notify.log
echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
# if [[ ${OCF_RESKEY_CRM_meta_notify_type} == 'pre' ]] ; then
# # PRE- anything notify section
# case "$OCF_RESKEY_CRM_meta_notify_operation" in
# *) ;;
# esac
# el
if [[ ${OCF_RESKEY_CRM_meta_notify_type} == 'post' ]] ; then
# POST- anything notify section
case "$OCF_RESKEY_CRM_meta_notify_operation" in
promote)
ocf_log info "${LH} post-promote begin."
jjj_join ${OCF_RESKEY_CRM_meta_notify_promote_uname}
ocf_log info "${LH} post-promote end."
;;
start)
ocf_log info "${LH} post-start begin."
# check did this event from this host
my_host "$OCF_RESKEY_CRM_meta_notify_start_uname"
rc=$?
if [[ $rc == $OCF_SUCCESS ]] ; then
jjj_join ${OCF_RESKEY_CRM_meta_notify_master_uname}
fi
ocf_log info "${LH} post-start end."
;;
stop)
# if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
ocf_log info "${LH} post-stop begin."
unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_stop_uname}"
ocf_log info "${LH} post-stop end."
;;
*) ;;
esac
fi
return $OCF_SUCCESS
}
action_promote() {
local rc
local pid
local LH="${LL} promote:"
if [[ ${OCF_RESKEY_debug} == "true" ]] ; then
d=$(date '+%Y%m%d %H:%M:%S')
echo $d >> /tmp/rmq-promote.log
env >> /tmp/rmq-promote.log
echo "$d [promote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_monitor
rc=$?
case "$rc" in
"$OCF_SUCCESS")
# Running as slave. Normal, expected behavior.
ocf_log info "${LH} Resource is currently running as Slave"
# rabbitmqctl start_app if need
get_status rabbit
rc=$?
if [[ $rc != $OCF_SUCCESS ]] ; then
start_rmq_server_app
rc=$?
if [[ $rc == 0 ]] ; then
# create timestamp file
test -f $RMQ_START_TIME || now > $RMQ_START_TIME
get_monitor
fi
fi
;;
"$OCF_RUNNING_MASTER")
# Already a master. Unexpected, but not a problem.
ocf_log warn "${LH} Resource is already running as Master"
return $OCF_SUCCESS
;;
"$OCF_NOT_RUNNING")
# Currently not running. Need to start before promoting.
ocf_log err "${LH} Resource is currently not running"
#action_start
return $OCF_NOT_RUNNING
;;
*)
# Failed resource. Let the cluster manager recover.
ocf_log err "${LH} Unexpected error, cannot promote"
exit $rc
;;
esac
# transform slave RMQ-server to master
echo > ${MASTER_FLAG_FILE}
ocf_log info "${LH} action end."
return $rc
}
action_demote() {
local rc
local pid
local LH="${LL} demote:"
if [[ ${OCF_RESKEY_debug} == "true" ]] ; then
d=`date '+%Y%m%d %H:%M:%S'`
echo $d >> /tmp/rmq-demote.log
env >> /tmp/rmq-demote.log
echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
ocf_log info "${LH} action begin."
get_monitor
rc=$?
case "$rc" in
"$OCF_RUNNING_MASTER")
# Running as master. Normal, expected behavior.
ocf_log warn "${LH} Resource is currently running as Master"
# nothing to do, because rejoin, if need, will happens in post-promote notify
ocf_run rm -f $RMQ_START_TIME
rc=$OCF_SUCCESS
;;
"$OCF_SUCCESS")
# Alread running as slave. Nothing to do.
ocf_log warn "${LH} Resource is currently running as Slave"
return $OCF_SUCCESS
;;
"$OCF_NOT_RUNNING")
ocf_log warn "${LH} Try to demote currently not running resource. nothing to do."
rc=$OCF_SUCCESS
;;
"$OCF_ERR_GENERIC")
ocf_log err "${LH} Error while demote. Stopping resource."
action_stop
rc=$?
;;
*)
# Failed resource. Let the cluster manager recover.
ocf_log err "${LH} Unexpected error, cannot demote"
exit $rc
;;
esac
# transform master RMQ-server to slave
ocf_run rm -f ${MASTER_FLAG_FILE}
ocf_log info "${LH} action end."
return $rc
}
#######################################################################
rmq_setup_env
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
action_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) action_start;;
stop) action_stop;;
status) action_status;;
monitor) action_monitor;;
validate) action_validate;;
promote) action_promote;;
demote) action_demote;;
notify) action_notify;;
validate-all) action_validate;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
###

View File

@ -10,7 +10,9 @@ class nova::rabbitmq(
$cluster = false,
$cluster_nodes = [], #Real node names to install RabbitMQ server onto.
$enabled = true,
$rabbit_node_ip_address = 'UNSET'
$rabbit_node_ip_address = 'UNSET',
$ha_mode = false,
$primary_controller = false
) {
# only configure nova after the queue is up
@ -21,39 +23,130 @@ class nova::rabbitmq(
$delete_guest_user = false
} else {
$delete_guest_user = true
rabbitmq_user { $userid:
admin => true,
password => $password,
provider => 'rabbitmqctl',
require => Class['rabbitmq::server'],
}
# I need to figure out the appropriate permissions
rabbitmq_user_permissions { "${userid}@${virtual_host}":
configure_permission => '.*',
write_permission => '.*',
read_permission => '.*',
provider => 'rabbitmqctl',
}->Anchor<| title == 'nova-start' |>
}
$service_ensure = 'running'
$service_enabled = true
} else {
$service_ensure = 'stopped'
$service_enabled = false
}
if ($ha_mode) {
$service_provider = 'pacemaker'
$service_name = 'p_rabbitmq-server'
} else {
$service_provider = undef
$service_name = 'rabbitmq-server'
}
if ($ha_mode and ! $primary_controller) {
$real_delete_guest_user = false
} else {
$real_delete_guest_user = $delete_guest_user
}
class { 'rabbitmq::server':
service_name => $service_name,
service_ensure => $service_ensure,
service_provider => $service_provider,
service_enabled => $service_enabled,
port => $port,
delete_guest_user => $delete_guest_user,
delete_guest_user => $real_delete_guest_user,
config_cluster => $cluster,
cluster_disk_nodes => $cluster_nodes,
version => $::openstack_version['rabbitmq_version'],
node_ip_address => $rabbit_node_ip_address,
node_ip_address => $rabbit_node_ip_address
}
if ($enabled) {
rabbitmq_vhost { $virtual_host:
provider => 'rabbitmqctl',
require => Class['rabbitmq::server'],
if ($ha_mode) {
# OCF script for pacemaker
# and his dependences
file {'rabbitmq-ocf':
path =>'/usr/lib/ocf/resource.d/mirantis/rabbitmq-server',
mode => '0755',
owner => root,
group => root,
source => "puppet:///modules/nova/ocf/rabbitmq",
}
# Disable OS-aware service, because rabbitmq-server managed by Pacemaker.
service {'rabbitmq-server__disabled':
name => 'rabbitmq-server',
ensure => 'stopped',
enable => false,
}
File<| title == 'ocf-mirantis-path' |> -> File['rabbitmq-ocf']
Package['pacemaker'] -> File<| title == 'ocf-mirantis-path' |>
Package['pacemaker'] -> File['rabbitmq-ocf']
Package['rabbitmq-server'] ->
Service['rabbitmq-server__disabled'] ->
File['rabbitmq-ocf'] ->
Service["$service_name"]
if ($primary_controller) {
cs_resource {"$service_name":
ensure => present,
#cib => 'rabbitmq',
primitive_class => 'ocf',
provided_by => 'mirantis',
primitive_type => 'rabbitmq-server',
parameters => {
'node_port' => $port,
#'debug' => true,
},
metadata => {
'migration-threshold' => 'INFINITY'
},
multistate_hash => {
'type' => 'master',
},
ms_metadata => {
'notify' => 'true',
'ordered' => 'true',
'interleave' => 'false', # We shouldn't enable interleave, for parallel start of RA.
'master-max' => '1',
'master-node-max' => '1',
'target-role' => 'Master'
},
operations => {
'monitor' => {
'interval' => '30',
'timeout' => '60'
},
'monitor:Master' => { # name:role
'role' => 'Master',
'interval' => '27', # should be non-intercectable with interval from ordinary monitor
'timeout' => '60'
},
'start' => {
'timeout' => '120'
},
'stop' => {
'timeout' => '60'
},
'promote' => {
'timeout' => '120'
},
'demote' => {
'timeout' => '60'
},
'notify' => {
'timeout' => '60'
},
},
}
File['rabbitmq-ocf'] ->
Cs_resource["$service_name"] ->
Service["$service_name"]
}
exec { 'waiting for start rabbitmq-master':
command => '/bin/sleep 120'
}
Service["$service_name"] ->
Exec['waiting for start rabbitmq-master'] ->
Rabbitmq_user <||>
}
}

View File

@ -422,6 +422,7 @@ class openstack::controller (
max_overflow => $max_overflow,
idle_timeout => $idle_timeout,
novnc_address => $novnc_address,
ha_mode => $ha_mode,
}
######### Cinder Controller Services ########

View File

@ -32,6 +32,7 @@ class openstack::nova::controller (
$nova_user_password,
$nova_db_password,
$primary_controller = false,
$ha_mode = false,
# Network
$fixed_range = '10.0.0.0/24',
$floating_range = false,
@ -123,6 +124,8 @@ class openstack::nova::controller (
port => $rabbitmq_bind_port,
cluster_nodes => $rabbitmq_cluster_nodes,
cluster => $rabbit_cluster,
primary_controller => $primary_controller,
ha_mode => $ha_mode,
}
}
'qpid': {

View File

@ -1,338 +0,0 @@
#!/bin/bash
#
#
# OpenStack OVS Service (quantum-metadata-agent)
#
# Description: Manages an OpenStack OVS Service (quantum-metadata-agent) process as an HA resource
#
# Authors: Emilien Macchi
# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han
#
# Support: openstack@lists.launchpad.net
# License: Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_agent_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
PATH=/sbin:/usr/sbin:/bin:/usr/bin
OCF_RESKEY_binary_default="rabbitmq-server"
OCF_RESKEY_control_default="/usr/sbin/rabbitmqctl"
OCF_RESKEY_init_log_dir_default="/var/log/rabbitmq"
OCF_RESKEY_user_default="rabbitmq"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
OCF_RESKEY_start_prog_default="runuser rabbitmq --session-command"
OCF_RESKEY_lock_file_default="/var/lock/subsys/rabbitmq-server"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_control=${OCF_RESKEY_control_default}}
: ${OCF_RESKEY_init_log_dir=${OCF_RESKEY_init_log_dir_default}}
: ${OCF_RESKEY_start_prog=${OCF_RESKEY_start_prog_default}}
: ${OCF_RESKEY_lock_file=${OCF_RESKEY_lock_file_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an RabbitMQ server process as an HA resource
The 'start' operation starts the networking service.
The 'stop' operation stops the networking service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the networking service is running
The 'monitor' operation reports whether the networking service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="quantum-metadata-agent">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the RabbitMQ server
May manage a RabbitMQ server instance or a clone set that
creates a distributed RabbitMQ cluster.
</longdesc>
<shortdesc lang="en">RabbitMQ server</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the RabbitMQ server binary
</longdesc>
<shortdesc lang="en">RabbitMQ server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running RabbitMQ server
</longdesc>
<shortdesc lang="en">RabbitMQ server user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="control" unique="0" required="0">
<longdesc lang="en">
The control utility to use for this RabbitMQ server instance
</longdesc>
<shortdesc lang="en">RabbitMQ server control utility</shortdesc>
<content type="string" default="${OCF_RESKEY_control_default}" />
</parameter>
: ${OCF_RESKEY_init_log_dir=${OCF_RESKEY_init_log_dir_default}}
: ${OCF_RESKEY_start_prog=${OCF_RESKEY_start_prog_default}}
: ${OCF_RESKEY_lock_file=${OCF_RESKEY_lock_file_default}}
<parameter name="init_log_dir" unique="0" required="0">
<longdesc lang="en">
undef
</longdesc>
<shortdesc lang="en">undef</shortdesc>
<content type="string" default="${OCF_RESKEY_init_log_dir_default}" />
</parameter>
<parameter name="start_prog" unique="0" required="0">
<longdesc lang="en">
undef</longdesc>
<shortdesc lang="en">undef</shortdesc>
<content type="string" default="${OCF_RESKEY_start_prog_default}" />
</parameter>
<parameter name="lock_file" unique="0" required="0">
<longdesc lang="en">
Lock file
</longdesc>
<shortdesc lang="en">Lock file</shortdesc>
<content type="string" default="${OCF_RESKEY_lock_file_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
rabbit_mq_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary $OCF_RESKEY_control
check_binary netstat
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
if ! [ -d $OCF_RESKEY_init_log_dir ]; then
ocf_log err "Directory $OCF_RESKEY_init_log_dir does not exist"
return $OCF_ERR_INSTALLED
fi
true
}
rabbit_mq_status() {
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "Rabbit-MQ server seems not to exist"
pid=`pgrep -f ${OCF_RESKEY_binary}`
if [ $? -eq 0 ]
then
ocf_log warn "Rabbit-MQ server was run, but no pid file found."
ocf_log warn "Will use $pid as PID of process Rabbit-MQ server"
ocf_log warn "Writing $pid into $OCF_RESKEY_pid"
echo $pid > $OCF_RESKEY_pid
else
return $OCF_NOT_RUNNING
fi
else
pid=`cat $OCF_RESKEY_pid`
fi
$OCF_RESKEY_control status > /dev/null 2>&1
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
else
ocf_log info "Old PID file $OCF_RESKEY_pid found (with pid $pid), but Rabbit-MQ server is not running"
return $OCF_NOT_RUNNING
fi
}
rabbit_mq_monitor() {
rabbit_mq_status
rc=$?
return $rc
}
clean_up() {
ocf_log info "cleaning up Rabbit-MQ server. nothing to do."
}
# rotate_logs_rabbitmq() {
# $OCF_RESKEY_control rotate_logs > /dev/null 2>&1
# if [ $? != 0 ] ; then
# RETVAL=1
# fi
# }
rabbit_mq_start() {
local rc
rabbit_mq_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "Rabbit-MQ server already running"
return $OCF_SUCCESS
fi
clean_up
# run the actual quantum-metadata-agent daemon. Don't use ocf_run as we're sending the tool's output
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
--config-file=$OCF_RESKEY_agent_config $OCF_RESKEY_additional_parameters"' >> \
/dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
rabbit_mq_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "Rabbit-MQ server start failed"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
ocf_log info "Rabbit-MQ server started"
return $OCF_SUCCESS
}
rabbit_mq_stop() {
local rc
local pid
rabbit_mq_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
clean_up
ocf_log info "Rabbit-MQ server already stopped"
return $OCF_SUCCESS
fi
# Try SIGTERM
pid=`cat $OCF_RESKEY_pid`
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Rabbit-MQ server couldn't be stopped"
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
rabbit_mq_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "Rabbit-MQ server still hasn't stopped yet. Waiting ..."
done
rabbit_mq_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "Rabbit-MQ server failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
clean_up
ocf_log info "Rabbit-MQ server stopped"
rm -f $OCF_RESKEY_pid
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
rabbit_mq_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) rabbit_mq_start;;
stop) rabbit_mq_stop;;
status) rabbit_mq_status;;
monitor) rabbit_mq_monitor;;
validate) rabbit_mq_validate;;
validate-all) rabbit_mq_validate;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac

View File

@ -17,7 +17,7 @@
# [*cluster_disk_nodes*] - which nodes to cluster with (including the current one)
# [*erlang_cookie*] - erlang cookie, must be the same for all nodes in a cluster
# [*wipe_db_on_cookie_change*] - whether to wipe the RabbitMQ data if the specified
# erlang_cookie differs from the current one. This is a sad parameter: actually,
# erlang_cookie differs from the current one. This is a sad parameter: actually,
# if the cookie indeed differs, then wiping the database is the *only* thing you
# can do. You're only required to set this parameter to true as a sign that you
# realise this.
@ -26,7 +26,7 @@
# stdlib
# Sample Usage:
#
#
#
#
#
# [Remember: No empty lines between comments and class definition]
@ -37,6 +37,7 @@ class rabbitmq::server(
$version = 'UNSET',
$service_name = 'rabbitmq-server',
$service_ensure = 'running',
$service_enabled = true,
$config_stomp = false,
$stomp_port = '6163',
$config_cluster = false,
@ -47,7 +48,9 @@ class rabbitmq::server(
$erlang_cookie='EOKOWXQREETZSHFNTPEY',
$wipe_db_on_cookie_change=true,
$inet_dist_listen_min = '41055',
$inet_dist_listen_max = '41055'
$inet_dist_listen_max = '41055',
$max_retry = '60',
$service_provider = undef
) {
validate_bool($delete_guest_user, $config_stomp)
@ -102,7 +105,7 @@ class rabbitmq::server(
require => Package[$package_name],
notify => Class['rabbitmq::service'],
}
if $config_cluster {
file { 'erlang_cookie':
path =>"/var/lib/rabbitmq/.erlang.cookie",
@ -111,7 +114,7 @@ class rabbitmq::server(
mode => '0400',
content => $erlang_cookie,
replace => true,
before => File['rabbitmq.config'],
before => File['rabbitmq.config'],
require => Exec['wipe_db'], # require => Exec['rabbitmq_stop']
}
# require authorize_cookie_change
@ -140,7 +143,7 @@ class rabbitmq::server(
mode => '0644',
notify => Class['rabbitmq::service'],
}
case $::osfamily {
'RedHat' : {
file { 'rabbitmq-server':
@ -169,10 +172,12 @@ class rabbitmq::server(
}
}
}
class { 'rabbitmq::service':
service_name => $service_name,
ensure => $service_ensure,
service_name => $service_name,
ensure => $service_ensure,
enabled => $service_enabled,
service_provider => $service_provider
}
if $delete_guest_user {

View File

@ -15,7 +15,9 @@
#
class rabbitmq::service(
$service_name = 'rabbitmq-server',
$ensure='running'
$service_provider = undef,
$ensure='running',
$enabled=true
) {
validate_re($ensure, '^(running|stopped)$')
@ -24,7 +26,7 @@ class rabbitmq::service(
Class['rabbitmq::service'] -> Rabbitmq_vhost<| |>
Class['rabbitmq::service'] -> Rabbitmq_user_permissions<| |>
$ensure_real = 'running'
$enable_real = true
$enable_real = $enabled
} else {
$ensure_real = 'stopped'
$enable_real = false
@ -37,11 +39,21 @@ class rabbitmq::service(
on package rabbitmq-server update": }
}
service { $service_name:
ensure => $ensure_real,
enable => $enable_real,
hasstatus => true,
hasrestart => true,
if ($service_provider) {
service { $service_name:
ensure => $ensure_real,
enable => $enable_real,
hasstatus => true,
hasrestart => true,
provider => $service_provider,
}
} else {
service { $service_name:
ensure => $ensure_real,
enable => $enable_real,
hasstatus => true,
hasrestart => true,
}
}
}

View File

@ -1 +1 @@
RABBITMQ_SERVER_ERL_ARGS="+K true +A30 +P 1048576"
SERVER_ERL_ARGS="+K true +A30 +P 1048576"

View File

@ -5,20 +5,23 @@
{inet_dist_listen_min, <%= @inet_dist_listen_min %>},
{inet_dist_listen_max, <%= @inet_dist_listen_max %>},
{inet_default_connect_options, [{nodelay,true}]}
]}
<% if @config_cluster -%>
,{rabbit, [
]},
{rabbit, [
{log_levels, [connection,debug,info,error]},
{cluster_nodes, [<%= @cluster_disk_nodes.map { |n| "\'rabbit@#{n}\'" }.join(', ') %>]},
{tcp_listen_options, [binary,
{packet, raw},
{reuseaddr, true},
{backlog, 128},
{nodelay, true},
{exit_on_close, false},
{keepalive, true}]}
{default_vhost, <<"<%= @virtual_host %>">>},
{default_user, <<"<%= @userid %>">>},
{default_pass, <<"<%= @password %>">>},
{default_permissions, [<<".*">>, <<".*">>, <<".*">>]},
{tcp_listen_options, [
binary,
{packet, raw},
{reuseaddr, true},
{backlog, 128},
{nodelay, true},
{exit_on_close, false},
{keepalive, true}
]}
]}
<% end -%>
<% if @config_stomp -%>
,{rabbitmq_stomp, [
{tcp_listeners, [<%= @stomp_port %>]}