Fix RabbitMQ element clustering start and stop

Prevent upstart respawning from SIGTERM due to post-stop pkill running.

Separate config files out of the install.d script into files to help
readability.

Renumber 40-rabbitmq to 51-rabbitmq and 50-rabbitmq-passwords to
52-rabbitmq-passwords so that ntp runs before these scripts

Graceful start and stop, to prevent split-brain issues. In the non-cluster
case: just restart.

In the cluster case: stop everything gracefully. Start everything. Join
bootstrap node if not bootstrap, otherwise join any node. This prevents getting
two disjoint clusters.

"graceful" means RAM nodes sync with disk nodes before they stop. If they are
stopped unceremoniously, they lose data.

Closes-Bug: #1334314
Change-Id: Ic758256481fdd31d10f4e4a341ae93cb372a0766
This commit is contained in:
Nicholas Randon 2014-08-18 19:18:31 +01:00
parent 2a656d8c88
commit b4f59ef86b
10 changed files with 210 additions and 137 deletions

View File

@ -5,3 +5,4 @@ os-refresh-config
os-svc-install
sysctl
use-ephemeral
ntp

View File

@ -0,0 +1,27 @@
start on runlevel [2345]
stop on runlevel [016]
respawn
# The default post-start of 1 second sleep delays respawning enough to
# not hit the default of 10 times in 5 seconds. Make it 2 times in 5s.
respawn limit 2 5
# Process will exit from SIGTERM due to post-stop pkill, prevent this
# causing a respawn
normal exit 0 TERM
env RUN_DIR=/var/run/rabbitmq
env PID_FILE=$RUN_DIR/pid
env OS_SVC_ENABLE_CONTROL=1
export OS_SVC_ENABLE_CONTROL
pre-start script
[ -d "$RUN_DIR" ] || install -d -D -m 0755 -o rabbitmq -g rabbitmq $RUN_DIR
end script
exec /usr/sbin/rabbitmq-server > /var/log/rabbitmq/startup_log \
2> /var/log/rabbitmq/startup_err
post-start exec /usr/sbin/rabbitmqctl wait $PID_FILE >/dev/null 2>&1
pre-stop exec /usr/sbin/rabbitmqctl stop $PID_FILE >/dev/null 2>&1
# Get the Erlang nameserver too.
post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1

View File

@ -0,0 +1,4 @@
HOME=/var/lib/rabbitmq
LOG_BASE=/var/log/rabbitmq
MNESIA_BASE=/var/lib/rabbitmq/mnesia
PID_FILE=/var/run/rabbitmq/pid

View File

@ -8,6 +8,8 @@ install-packages rabbitmq-server
register-state-path --leave-symlink /var/lib/rabbitmq
register-state-path --leave-symlink /var/log/rabbitmq
FILES="$(dirname $0)/../files"
# Note(jang): the rabbitmq-server service is installed, but not started, since
# the first run of os-collect-config is required to configure it properly.
@ -20,28 +22,8 @@ if [ "$DISTRO_NAME" = "ubuntu" ] || [ "$DISTRO_NAME" = "debian" -a "$DIB_INIT_SY
# that it'll be running a venv-based service to use directly. Install an upstart
# configuration that's compatible with os-svc-enable and os-svc-restart
cat > /etc/init/rabbitmq-server.conf <<eof
start on runlevel [2345]
stop on runlevel [016]
respawn
# the default post-start of 1 second sleep delays respawning enough to
# not hit the default of 10 times in 5 seconds. Make it 2 times in 5s.
respawn limit 2 5
env OS_SVC_ENABLE_CONTROL=1
export OS_SVC_ENABLE_CONTROL
pre-start script
[ -d "/var/run/rabbitmq" ] || install -d -D -m 0755 -o rabbitmq -g rabbitmq /var/run/rabbitmq
end script
exec /usr/sbin/rabbitmq-server > /var/log/rabbitmq/startup_log 2> /var/log/rabbitmq/startup_err
post-start exec /usr/sbin/rabbitmqctl wait /var/run/rabbitmq/pid >/dev/null 2>&1
pre-stop exec /usr/sbin/rabbitmqctl stop /var/run/rabbitmq/pid >/dev/null 2>&1
# Get the Erlang nameserver too.
post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1
eof
FILE=/etc/init/rabbitmq-server.conf
install -g root -o root -m 0755 "${FILES}${FILE}" "${FILE}"
fi
if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then
@ -52,13 +34,8 @@ if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then
sed -i 's/\[Service\]/\[Service\]\nRestart=on-failure/g' /lib/systemd/system/rabbitmq-server.service
fi
cat > /etc/rabbitmq/rabbitmq-env.conf <<EOF
HOME=/mnt/state/var/lib/rabbitmq
LOG_BASE=/mnt/state/var/log/rabbitmq
MNESIA_BASE=/mnt/state/var/lib/rabbitmq/mnesia
PID_FILE=/var/run/rabbitmq/pid
EOF
FILE=/etc/rabbitmq/rabbitmq-env.conf
install -g root -o root -m 0755 "${FILES}${FILE}" "${FILE}"
# Enable ulimits in pam if needed
PAM_FILE=/etc/pam.d/su

View File

@ -3,8 +3,8 @@ set -eu
[ -x /usr/sbin/semanage ] || exit 0
semanage fcontext -a -t rabbitmq_var_lib_t "/mnt/state/var/lib/rabbitmq(/.*)?"
restorecon -Rv /mnt/state/var/lib/rabbitmq
semanage fcontext -a -t rabbitmq_var_lib_t "/var/lib/rabbitmq(/.*)?"
restorecon -Rv /var/lib/rabbitmq
semanage fcontext -a -t rabbitmq_var_log_t "/mnt/state/var/log/rabbitmq(/.*)?"
restorecon -Rv /mnt/state/var/log/rabbitmq
semanage fcontext -a -t rabbitmq_var_log_t "/var/log/rabbitmq(/.*)?"
restorecon -Rv /var/log/rabbitmq

View File

@ -1,101 +0,0 @@
#!/bin/bash
set -eux
[ -d /mnt/state/var/log/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/log/rabbitmq
os-svc-enable -n rabbitmq-server
os-svc-restart -n rabbitmq-server
# Cluster setup
# Why not using auto-configuration of cluster (specifying 'cluster_nodes' in
# rabbitmq.conf):
# 1) This is not robust because when joining a node, it iterates
# through all nodes and joins to first available node, if no suitable node is
# found, joining node is started standalone.
# 2) This is done only for fresh nodes (first start, or reset db).
# 3) You might end up with multiple different clusters A joins with B, C joins
# with D
#
# When joining a node into rabbitmq cluster:
# - if this node is already in cluster with current master[1] node, do nothing
# - iterate through all nodes and check if there is a node which is in a
# cluster[2], if such node exists, join to this node
# - if no existing cluster is found:
# - if this is master node, start this node standalone
# - if it's not master node, try to join with master node otherwise fail (if
# fail we retry on next os-refresh-config run)
#
# [1] master node is first node in alphabetically sorted list of 'rabbit.nodes'
# [2] cluster is any cluster with at least 2 running nodes
function is_in_cluster() {
local node=$1
# Returns true if the list following "running_nodes" in rabbitmqctl
# cluster_status contains at least two nodes.
rabbitmqctl -n rabbit@$node cluster_status|grep -q "running_nodes,\[[^]]\+,"
}
function join_with() {
local node=$1
rabbitmqctl stop_app
rabbitmqctl join_cluster rabbit@$node || return 1
rabbitmqctl start_app
}
LOCAL=$(hostname -s)
# TODO - nodes are comma separated hostnames, there is probably no type for this
NODES=$(os-apply-config --key rabbit.nodes --type raw --key-default '' | sed 's/,/\n/g')
MASTER=$(echo "$NODES"|sort -n|head -1)
# Heat can return hostname with capital letters, cloud-init converts to lowercase. Make sure
# we can compare them in a case-insensitive manor:
LOCAL=${LOCAL,,}
NODES=${NODES,,}
MASTER=${MASTER,,}
if [ -n "$NODES" ];then
if os-is-bootstrap-host; then
# if this is master node which is already clustered, do nothing
if is_in_cluster $LOCAL; then
exit 0
fi
else
# if this node is already in cluster with current master node, do nothing
if rabbitmqctl cluster_status|grep -q "$MASTER"; then
exit 0
fi
fi
JOINED_WITH=''
# find another node which is already clustered and try join with it
for NODE in $NODES;do
if [ ! "$NODE" = "$LOCAL" ] && is_in_cluster $NODE; then
if join_with $NODE; then
JOINED_WITH=$NODE
break
fi
fi
done
if [ -z "$JOINED_WITH"]; then
# if there is no existing cluster yet and this is master node, start this
# node standalone (other nodes will join to this one)
if os-is-bootstrap-host; then
rabbitmqctl start_app
else
if ! join_with $MASTER; then
echo "failed to join this node into cluster"
exit 1
fi
fi
fi
# wait until rabbitmq node is up
timeout 60 rabbitmqctl wait /var/run/rabbitmq/pid
# make sure that all queues (except those with auto-generated names) are
# mirrored across all nodes in running:
rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}'
fi

View File

@ -0,0 +1,167 @@
#!/bin/bash
set -eux
set -o pipefail
LOCAL_RABBIT_HOST="$(os-apply-config --key bootstrap_host.nodeid --type netaddress --key-default '')"
NODES=($(os-apply-config --key rabbit.nodes --type raw --key-default '' | sed 's/,/\n/g' | sort))
TOTAL_NODES=${#NODES[@]}
# Insufficient meta-data to attempt to start-up RabbitMQ.
if [ -z "${LOCAL_RABBIT_HOST}" ]; then
echo "RabbitMQ bootstrap_host.nodeid is not defined in meta-data, aborting."
exit 255
fi
os-svc-enable -n rabbitmq-server
## Non-cluster configuration set-up. ##
if [ ${TOTAL_NODES} -le 1 ]; then
os-svc-restart -n rabbitmq-server
echo "RabbitMQ non-cluster configuration complete..."
exit 0
fi
## Cluster configuration set-up. ##
function is_in_cluster() {
# Returns true if the list following "running_nodes" in rabbitmqctl
# cluster_status contains at least two nodes.
rabbitmqctl cluster_status 2>/dev/null |
grep -q "running_nodes,\[[^]]\+,"
}
# Number of nodes in the cluster according to remote node $1.
# If $1 isn't in a cluster or it's in a cluster by itself, then this will
# return 0.
function cluster_size() {
local remote_node="${1}"
rabbitmqctl -n "rabbit@${remote_node}" cluster_status 2>/dev/null |
sed -n '/{running_nodes,\[[^]]\+,/,/\]\},/p' |
wc -l
}
function leave_cluster() {
rabbitmqctl stop_app
# This syncs all data into the cluster, then removes this node, cleaning local mnesia.
rabbitmqctl reset
}
export -f leave_cluster
function join_cluster_with() {
local remote_node="${1}"
local local_node="${2}"
rabbitmqctl stop_app
rabbitmqctl join_cluster "rabbit@${remote_node}" 2>/dev/null || true
rabbitmqctl start_app
if ! is_in_cluster; then
echo "Failed to join node [${local_node}] with [${remote_node}]..."
return 1
fi
}
BOOTSTRAP_NODE="$(os-apply-config --key bootstrap_host.bootstrap_nodeid --type netaddress --key-default '')"
NODE_INDEX=""
# Find the nodes being worked on in the NODES array.
for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
if [ "${NODES[$index]}" == "${LOCAL_RABBIT_HOST}" ]; then
NODE_INDEX=${index}
fi
done
if [ -z "${BOOTSTRAP_NODE}" -o ${TOTAL_NODES} -lt 3 -o -z "${NODE_INDEX}" ]; then
# We do not know who the bootstrap is, why are we attempting to bring up a Rabbit cluster?
# -OR- we do not have sufficient nodes to support HA so lets abort.
# -OR- we did not find our node in the array and hence did not set node_indexs.
echo "bootstrap_host.bootstrap_nodeid: ${BOOTSTRAP_NODE}, TOTAL_NODES: ${TOTAL_NODES}, NODE_INDEX: ${NODE_INDEX}"
echo "RabbitMQ cluster configuration prerequisites not met, aborting."
exit 255
fi
for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
if ! ping -c1 "${NODES[$index]}"; then
echo "RabbitMQ host unreachable: ${NODES[$index]}"
HOST_UNREACHABLE=1
fi
done
[ -z "${HOST_UNREACHABLE:-}" ] || exit 1
# Refuse to stop unless all nodes are running, this avoids pause_minority.
# From the RabbitMQ docs: pause_minority
# Your network is maybe less reliable. You have clustered across 3 AZs
# in EC2, and you assume that only one AZ will fail at once. In that
# scenario you want the remaining two AZs to continue working and the
# nodes from the failed AZ to rejoin automatically and without fuss when
# the AZ comes back.
# (See: os-apply-config/etc/rabbitmq/rabbitmq.config)
#
# We want to orchestrate nodes leaving the cluster. We'll do this using a
# metronome. For example, if we have 3 nodes, there will be six periods.
# The first node may leave in period 0. The second node may leave in period
# 1. The third node may leave in period 2.
#
# Metronome: 0 .. 1 .. 2 .. 3 .. 4 .. 5 ..
# Node leaves: 0 ....... 1 ....... 2 .......
#
# The dead periods in between allow for $PERIOD seconds of clock
# desynchronization. PERIOD should be about the half the length of time it
# takes for a node to join the cluster.
PERIOD=10
NODE_LEAVES_AT=$(( ${NODE_INDEX} * 2 ))
while is_in_cluster; do
NODES_IN_CLUSTER=$(cluster_size "${BOOTSTRAP_NODE}")
if [ ${NODES_IN_CLUSTER} -gt ${TOTAL_NODES} ]; then
echo "A node we don't know about appears to have joined the cluster, aborting."
exit 255
fi
METRONOME=$(( ($(date +%s) / ${PERIOD}) % (${TOTAL_NODES} * 2) ))
if [ ${NODES_IN_CLUSTER} -eq ${TOTAL_NODES} -a \
${METRONOME} -eq ${NODE_LEAVES_AT} ]; then
# All other nodes are in the cluster and it's our allotted time,
# safe to leave. Tell other nodes we're about to leave the cluster.
echo "Leaving cluster..."
timeout 300 bash -c leave_cluster || { rabbitmqctl start_app && exit 1; }
else
echo "Refusing to allow node to leave cluster..."
fi
sleep 2
done
# Restart RabbitMQ. We need to have left the cluster first or we risk data loss.
os-svc-restart -n rabbitmq-server
# We're the bootstrap node
if [ "${LOCAL_RABBIT_HOST}" == "${BOOTSTRAP_NODE}" ]; then
# If we are not in a cluster keep trying to join a node.
# Note: This loop is required as the BOOTSTRAP_NODE may have left a running
# cluster and it therefore must re-join.
while ! is_in_cluster; do
# Try to join with each node in turn.
COUNT=$(( (${COUNT:-0} + 1) % ${TOTAL_NODES} ))
if [ ${COUNT} -ne ${NODE_INDEX} ]; then
join_cluster_with "${NODES[${COUNT}]}" "${LOCAL_RABBIT_HOST}" || true
fi
done
# Check that we have not got a partition i.e. The case where we do not have
# synced clocks and hence we can get split in the clustering A+B C. If we
# get this we will wait as this is more favourable than a bad/broken
# cluster set-up.
while [[ $(cluster_size "${LOCAL_RABBIT_HOST}") -ne ${TOTAL_NODES} ]]; do
echo "Waiting for nodes to join [${BOOTSTRAP_NODE}]..."
sleep 10
done
else
# Wait until the BOOTSTRAP_NODE has at least formed a cluster with one node.
while [[ $(cluster_size "${BOOTSTRAP_NODE}") -lt 2 ]]; do
echo "Waiting for bootstrap node to initialise the cluster..."
sleep 10
done
is_in_cluster || join_cluster_with "${BOOTSTRAP_NODE}" "${LOCAL_RABBIT_HOST}"
fi
# Make sure that all queues (except those with auto-generated names) are
# mirrored across all nodes in the cluster running:
rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}'
echo "RabbitMQ cluster configuration complete..."

View File

@ -1,6 +1,4 @@
#!/bin/bash
set -eux
[ -d /mnt/state/var/lib/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/lib/rabbitmq
install -m 600 -o rabbitmq -g rabbitmq /dev/null /mnt/state/var/lib/rabbitmq/.erlang.cookie
[ -d /mnt/state/var/log/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/log/rabbitmq
install -m 600 -o rabbitmq -g rabbitmq /dev/null /var/lib/rabbitmq/.erlang.cookie