Merge "Fix RabbitMQ element clustering start and stop"

This commit is contained in:
Jenkins 2014-11-05 16:13:47 +00:00 committed by Gerrit Code Review
commit a0033790ca
10 changed files with 210 additions and 137 deletions

View File

@ -6,3 +6,4 @@ os-svc-install
package-installs
sysctl
use-ephemeral
ntp

View File

@ -0,0 +1,27 @@
start on runlevel [2345]
stop on runlevel [016]
respawn
# The default post-start of 1 second sleep delays respawning enough to
# not hit the default of 10 times in 5 seconds. Make it 2 times in 5s.
respawn limit 2 5
# Process will exit from SIGTERM due to post-stop pkill, prevent this
# causing a respawn
normal exit 0 TERM
env RUN_DIR=/var/run/rabbitmq
env PID_FILE=$RUN_DIR/pid
env OS_SVC_ENABLE_CONTROL=1
export OS_SVC_ENABLE_CONTROL
pre-start script
[ -d "$RUN_DIR" ] || install -d -D -m 0755 -o rabbitmq -g rabbitmq $RUN_DIR
end script
exec /usr/sbin/rabbitmq-server > /var/log/rabbitmq/startup_log \
2> /var/log/rabbitmq/startup_err
post-start exec /usr/sbin/rabbitmqctl wait $PID_FILE >/dev/null 2>&1
pre-stop exec /usr/sbin/rabbitmqctl stop $PID_FILE >/dev/null 2>&1
# Get the Erlang nameserver too.
post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1

View File

@ -0,0 +1,4 @@
HOME=/var/lib/rabbitmq
LOG_BASE=/var/log/rabbitmq
MNESIA_BASE=/var/lib/rabbitmq/mnesia
PID_FILE=/var/run/rabbitmq/pid

View File

@ -8,6 +8,8 @@ install-packages rabbitmq-server
register-state-path --leave-symlink /var/lib/rabbitmq
register-state-path --leave-symlink /var/log/rabbitmq
FILES="$(dirname $0)/../files"
# Note(jang): the rabbitmq-server service is installed, but not started, since
# the first run of os-collect-config is required to configure it properly.
@ -20,28 +22,8 @@ if [ "$DISTRO_NAME" = "ubuntu" ] || [ "$DISTRO_NAME" = "debian" -a "$DIB_INIT_SY
# that it'll be running a venv-based service to use directly. Install an upstart
# configuration that's compatible with os-svc-enable and os-svc-restart
cat > /etc/init/rabbitmq-server.conf <<eof
start on runlevel [2345]
stop on runlevel [016]
respawn
# the default post-start of 1 second sleep delays respawning enough to
# not hit the default of 10 times in 5 seconds. Make it 2 times in 5s.
respawn limit 2 5
env OS_SVC_ENABLE_CONTROL=1
export OS_SVC_ENABLE_CONTROL
pre-start script
[ -d "/var/run/rabbitmq" ] || install -d -D -m 0755 -o rabbitmq -g rabbitmq /var/run/rabbitmq
end script
exec /usr/sbin/rabbitmq-server > /var/log/rabbitmq/startup_log 2> /var/log/rabbitmq/startup_err
post-start exec /usr/sbin/rabbitmqctl wait /var/run/rabbitmq/pid >/dev/null 2>&1
pre-stop exec /usr/sbin/rabbitmqctl stop /var/run/rabbitmq/pid >/dev/null 2>&1
# Get the Erlang nameserver too.
post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1
eof
FILE=/etc/init/rabbitmq-server.conf
install -g root -o root -m 0755 "${FILES}${FILE}" "${FILE}"
fi
if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then
@ -52,13 +34,8 @@ if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then
sed -i 's/\[Service\]/\[Service\]\nRestart=on-failure/g' /lib/systemd/system/rabbitmq-server.service
fi
cat > /etc/rabbitmq/rabbitmq-env.conf <<EOF
HOME=/mnt/state/var/lib/rabbitmq
LOG_BASE=/mnt/state/var/log/rabbitmq
MNESIA_BASE=/mnt/state/var/lib/rabbitmq/mnesia
PID_FILE=/var/run/rabbitmq/pid
EOF
FILE=/etc/rabbitmq/rabbitmq-env.conf
install -g root -o root -m 0755 "${FILES}${FILE}" "${FILE}"
# Enable ulimits in pam if needed
PAM_FILE=/etc/pam.d/su

View File

@ -3,8 +3,8 @@ set -eu
[ -x /usr/sbin/semanage ] || exit 0
semanage fcontext -a -t rabbitmq_var_lib_t "/mnt/state/var/lib/rabbitmq(/.*)?"
restorecon -Rv /mnt/state/var/lib/rabbitmq
semanage fcontext -a -t rabbitmq_var_lib_t "/var/lib/rabbitmq(/.*)?"
restorecon -Rv /var/lib/rabbitmq
semanage fcontext -a -t rabbitmq_var_log_t "/mnt/state/var/log/rabbitmq(/.*)?"
restorecon -Rv /mnt/state/var/log/rabbitmq
semanage fcontext -a -t rabbitmq_var_log_t "/var/log/rabbitmq(/.*)?"
restorecon -Rv /var/log/rabbitmq

View File

@ -1,101 +0,0 @@
#!/bin/bash
set -eux
[ -d /mnt/state/var/log/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/log/rabbitmq
os-svc-enable -n rabbitmq-server
os-svc-restart -n rabbitmq-server
# Cluster setup
# Why not using auto-configuration of cluster (specifying 'cluster_nodes' in
# rabbitmq.conf):
# 1) This is not robust because when joining a node, it iterates
# through all nodes and joins to first available node, if no suitable node is
# found, joining node is started standalone.
# 2) This is done only for fresh nodes (first start, or reset db).
# 3) You might end up with multiple different clusters A joins with B, C joins
# with D
#
# When joining a node into rabbitmq cluster:
# - if this node is already in cluster with current master[1] node, do nothing
# - iterate through all nodes and check if there is a node which is in a
# cluster[2], if such node exists, join to this node
# - if no existing cluster is found:
# - if this is master node, start this node standalone
# - if it's not master node, try to join with master node otherwise fail (if
# fail we retry on next os-refresh-config run)
#
# [1] master node is first node in alphabetically sorted list of 'rabbit.nodes'
# [2] cluster is any cluster with at least 2 running nodes
function is_in_cluster() {
local node=$1
# Returns true if the list following "running_nodes" in rabbitmqctl
# cluster_status contains at least two nodes.
rabbitmqctl -n rabbit@$node cluster_status|grep -q "running_nodes,\[[^]]\+,"
}
function join_with() {
local node=$1
rabbitmqctl stop_app
rabbitmqctl join_cluster rabbit@$node || return 1
rabbitmqctl start_app
}
LOCAL=$(hostname -s)
# TODO - nodes are comma separated hostnames, there is probably no type for this
NODES=$(os-apply-config --key rabbit.nodes --type raw --key-default '' | sed 's/,/\n/g')
MASTER=$(echo "$NODES"|sort -n|head -1)
# Heat can return hostname with capital letters, cloud-init converts to lowercase. Make sure
# we can compare them in a case-insensitive manor:
LOCAL=${LOCAL,,}
NODES=${NODES,,}
MASTER=${MASTER,,}
if [ -n "$NODES" ];then
if os-is-bootstrap-host; then
# if this is master node which is already clustered, do nothing
if is_in_cluster $LOCAL; then
exit 0
fi
else
# if this node is already in cluster with current master node, do nothing
if rabbitmqctl cluster_status|grep -q "$MASTER"; then
exit 0
fi
fi
JOINED_WITH=''
# find another node which is already clustered and try join with it
for NODE in $NODES;do
if [ ! "$NODE" = "$LOCAL" ] && is_in_cluster $NODE; then
if join_with $NODE; then
JOINED_WITH=$NODE
break
fi
fi
done
if [ -z "$JOINED_WITH"]; then
# if there is no existing cluster yet and this is master node, start this
# node standalone (other nodes will join to this one)
if os-is-bootstrap-host; then
rabbitmqctl start_app
else
if ! join_with $MASTER; then
echo "failed to join this node into cluster"
exit 1
fi
fi
fi
# wait until rabbitmq node is up
timeout 60 rabbitmqctl wait /var/run/rabbitmq/pid
# make sure that all queues (except those with auto-generated names) are
# mirrored across all nodes in running:
rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}'
fi

View File

@ -0,0 +1,167 @@
#!/bin/bash
set -eux
set -o pipefail
LOCAL_RABBIT_HOST="$(os-apply-config --key bootstrap_host.nodeid --type netaddress --key-default '')"
NODES=($(os-apply-config --key rabbit.nodes --type raw --key-default '' | sed 's/,/\n/g' | sort))
TOTAL_NODES=${#NODES[@]}
# Insufficient meta-data to attempt to start-up RabbitMQ.
if [ -z "${LOCAL_RABBIT_HOST}" ]; then
echo "RabbitMQ bootstrap_host.nodeid is not defined in meta-data, aborting."
exit 255
fi
os-svc-enable -n rabbitmq-server
## Non-cluster configuration set-up. ##
if [ ${TOTAL_NODES} -le 1 ]; then
os-svc-restart -n rabbitmq-server
echo "RabbitMQ non-cluster configuration complete..."
exit 0
fi
## Cluster configuration set-up. ##
function is_in_cluster() {
# Returns true if the list following "running_nodes" in rabbitmqctl
# cluster_status contains at least two nodes.
rabbitmqctl cluster_status 2>/dev/null |
grep -q "running_nodes,\[[^]]\+,"
}
# Number of nodes in the cluster according to remote node $1.
# If $1 isn't in a cluster or it's in a cluster by itself, then this will
# return 0.
function cluster_size() {
local remote_node="${1}"
rabbitmqctl -n "rabbit@${remote_node}" cluster_status 2>/dev/null |
sed -n '/{running_nodes,\[[^]]\+,/,/\]\},/p' |
wc -l
}
function leave_cluster() {
rabbitmqctl stop_app
# This syncs all data into the cluster, then removes this node, cleaning local mnesia.
rabbitmqctl reset
}
export -f leave_cluster
function join_cluster_with() {
local remote_node="${1}"
local local_node="${2}"
rabbitmqctl stop_app
rabbitmqctl join_cluster "rabbit@${remote_node}" 2>/dev/null || true
rabbitmqctl start_app
if ! is_in_cluster; then
echo "Failed to join node [${local_node}] with [${remote_node}]..."
return 1
fi
}
BOOTSTRAP_NODE="$(os-apply-config --key bootstrap_host.bootstrap_nodeid --type netaddress --key-default '')"
NODE_INDEX=""
# Find the nodes being worked on in the NODES array.
for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
if [ "${NODES[$index]}" == "${LOCAL_RABBIT_HOST}" ]; then
NODE_INDEX=${index}
fi
done
if [ -z "${BOOTSTRAP_NODE}" -o ${TOTAL_NODES} -lt 3 -o -z "${NODE_INDEX}" ]; then
# We do not know who the bootstrap is, why are we attempting to bring up a Rabbit cluster?
# -OR- we do not have sufficient nodes to support HA so lets abort.
# -OR- we did not find our node in the array and hence did not set node_indexs.
echo "bootstrap_host.bootstrap_nodeid: ${BOOTSTRAP_NODE}, TOTAL_NODES: ${TOTAL_NODES}, NODE_INDEX: ${NODE_INDEX}"
echo "RabbitMQ cluster configuration prerequisites not met, aborting."
exit 255
fi
for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
if ! ping -c1 "${NODES[$index]}"; then
echo "RabbitMQ host unreachable: ${NODES[$index]}"
HOST_UNREACHABLE=1
fi
done
[ -z "${HOST_UNREACHABLE:-}" ] || exit 1
# Refuse to stop unless all nodes are running, this avoids pause_minority.
# From the RabbitMQ docs: pause_minority
# Your network is maybe less reliable. You have clustered across 3 AZs
# in EC2, and you assume that only one AZ will fail at once. In that
# scenario you want the remaining two AZs to continue working and the
# nodes from the failed AZ to rejoin automatically and without fuss when
# the AZ comes back.
# (See: os-apply-config/etc/rabbitmq/rabbitmq.config)
#
# We want to orchestrate nodes leaving the cluster. We'll do this using a
# metronome. For example, if we have 3 nodes, there will be six periods.
# The first node may leave in period 0. The second node may leave in period
# 1. The third node may leave in period 2.
#
# Metronome: 0 .. 1 .. 2 .. 3 .. 4 .. 5 ..
# Node leaves: 0 ....... 1 ....... 2 .......
#
# The dead periods in between allow for $PERIOD seconds of clock
# desynchronization. PERIOD should be about the half the length of time it
# takes for a node to join the cluster.
PERIOD=10
NODE_LEAVES_AT=$(( ${NODE_INDEX} * 2 ))
while is_in_cluster; do
NODES_IN_CLUSTER=$(cluster_size "${BOOTSTRAP_NODE}")
if [ ${NODES_IN_CLUSTER} -gt ${TOTAL_NODES} ]; then
echo "A node we don't know about appears to have joined the cluster, aborting."
exit 255
fi
METRONOME=$(( ($(date +%s) / ${PERIOD}) % (${TOTAL_NODES} * 2) ))
if [ ${NODES_IN_CLUSTER} -eq ${TOTAL_NODES} -a \
${METRONOME} -eq ${NODE_LEAVES_AT} ]; then
# All other nodes are in the cluster and it's our allotted time,
# safe to leave. Tell other nodes we're about to leave the cluster.
echo "Leaving cluster..."
timeout 300 bash -c leave_cluster || { rabbitmqctl start_app && exit 1; }
else
echo "Refusing to allow node to leave cluster..."
fi
sleep 2
done
# Restart RabbitMQ. We need to have left the cluster first or we risk data loss.
os-svc-restart -n rabbitmq-server
# We're the bootstrap node
if [ "${LOCAL_RABBIT_HOST}" == "${BOOTSTRAP_NODE}" ]; then
# If we are not in a cluster keep trying to join a node.
# Note: This loop is required as the BOOTSTRAP_NODE may have left a running
# cluster and it therefore must re-join.
while ! is_in_cluster; do
# Try to join with each node in turn.
COUNT=$(( (${COUNT:-0} + 1) % ${TOTAL_NODES} ))
if [ ${COUNT} -ne ${NODE_INDEX} ]; then
join_cluster_with "${NODES[${COUNT}]}" "${LOCAL_RABBIT_HOST}" || true
fi
done
# Check that we have not got a partition i.e. The case where we do not have
# synced clocks and hence we can get split in the clustering A+B C. If we
# get this we will wait as this is more favourable than a bad/broken
# cluster set-up.
while [[ $(cluster_size "${LOCAL_RABBIT_HOST}") -ne ${TOTAL_NODES} ]]; do
echo "Waiting for nodes to join [${BOOTSTRAP_NODE}]..."
sleep 10
done
else
# Wait until the BOOTSTRAP_NODE has at least formed a cluster with one node.
while [[ $(cluster_size "${BOOTSTRAP_NODE}") -lt 2 ]]; do
echo "Waiting for bootstrap node to initialise the cluster..."
sleep 10
done
is_in_cluster || join_cluster_with "${BOOTSTRAP_NODE}" "${LOCAL_RABBIT_HOST}"
fi
# Make sure that all queues (except those with auto-generated names) are
# mirrored across all nodes in the cluster running:
rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}'
echo "RabbitMQ cluster configuration complete..."

View File

@ -1,6 +1,4 @@
#!/bin/bash
set -eux
[ -d /mnt/state/var/lib/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/lib/rabbitmq
install -m 600 -o rabbitmq -g rabbitmq /dev/null /mnt/state/var/lib/rabbitmq/.erlang.cookie
[ -d /mnt/state/var/log/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/log/rabbitmq
install -m 600 -o rabbitmq -g rabbitmq /dev/null /var/lib/rabbitmq/.erlang.cookie