From b4f59ef86bdaa09554c6741661a13a8f3fb12cba Mon Sep 17 00:00:00 2001 From: Nicholas Randon Date: Mon, 18 Aug 2014 19:18:31 +0100 Subject: [PATCH] Fix RabbitMQ element clustering start and stop Prevent upstart respawning from SIGTERM due to post-stop pkill running. Separate config files out of the install.d script into files to help readability. Renumber 40-rabbitmq to 51-rabbitmq and 50-rabbitmq-passwords to 52-rabbitmq-passwords so that ntp runs before these scripts Graceful start and stop, to prevent split-brain issues. In the non-cluster case: just restart. In the cluster case: stop everything gracefully. Start everything. Join bootstrap node if not bootstrap, otherwise join any node. This prevents getting two disjoint clusters. "graceful" means RAM nodes sync with disk nodes before they stop. If they are stopped unceremoniously, they lose data. Closes-Bug: #1334314 Change-Id: Ic758256481fdd31d10f4e4a341ae93cb372a0766 --- elements/rabbitmq-server/element-deps | 1 + .../files/etc/init/rabbitmq-server.conf | 27 +++ .../files/etc/rabbitmq/rabbitmq-env.conf | 4 + .../install.d/20-rabbitmq-server | 35 +--- .../var/lib/rabbitmq/.erlang.cookie | 0 .../configure.d/20-rabbitmq-server-selinux | 8 +- .../post-configure.d/40-rabbitmq | 101 ----------- .../post-configure.d/51-rabbitmq | 167 ++++++++++++++++++ ...bbitmq-passwords => 52-rabbitmq-passwords} | 0 .../pre-configure.d/80-rabbitmq-cluster | 4 +- 10 files changed, 210 insertions(+), 137 deletions(-) create mode 100644 elements/rabbitmq-server/files/etc/init/rabbitmq-server.conf create mode 100644 elements/rabbitmq-server/files/etc/rabbitmq/rabbitmq-env.conf rename elements/rabbitmq-server/os-apply-config/{mnt/state => }/var/lib/rabbitmq/.erlang.cookie (100%) delete mode 100755 elements/rabbitmq-server/os-refresh-config/post-configure.d/40-rabbitmq create mode 100755 elements/rabbitmq-server/os-refresh-config/post-configure.d/51-rabbitmq rename elements/rabbitmq-server/os-refresh-config/post-configure.d/{50-rabbitmq-passwords => 52-rabbitmq-passwords} (100%) diff --git a/elements/rabbitmq-server/element-deps b/elements/rabbitmq-server/element-deps index a2ba69110..49da280bc 100644 --- a/elements/rabbitmq-server/element-deps +++ b/elements/rabbitmq-server/element-deps @@ -5,3 +5,4 @@ os-refresh-config os-svc-install sysctl use-ephemeral +ntp diff --git a/elements/rabbitmq-server/files/etc/init/rabbitmq-server.conf b/elements/rabbitmq-server/files/etc/init/rabbitmq-server.conf new file mode 100644 index 000000000..3d419001c --- /dev/null +++ b/elements/rabbitmq-server/files/etc/init/rabbitmq-server.conf @@ -0,0 +1,27 @@ +start on runlevel [2345] +stop on runlevel [016] +respawn +# The default post-start of 1 second sleep delays respawning enough to +# not hit the default of 10 times in 5 seconds. Make it 2 times in 5s. +respawn limit 2 5 + +# Process will exit from SIGTERM due to post-stop pkill, prevent this +# causing a respawn +normal exit 0 TERM + +env RUN_DIR=/var/run/rabbitmq +env PID_FILE=$RUN_DIR/pid +env OS_SVC_ENABLE_CONTROL=1 +export OS_SVC_ENABLE_CONTROL + +pre-start script + [ -d "$RUN_DIR" ] || install -d -D -m 0755 -o rabbitmq -g rabbitmq $RUN_DIR +end script +exec /usr/sbin/rabbitmq-server > /var/log/rabbitmq/startup_log \ + 2> /var/log/rabbitmq/startup_err + +post-start exec /usr/sbin/rabbitmqctl wait $PID_FILE >/dev/null 2>&1 +pre-stop exec /usr/sbin/rabbitmqctl stop $PID_FILE >/dev/null 2>&1 + +# Get the Erlang nameserver too. +post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1 diff --git a/elements/rabbitmq-server/files/etc/rabbitmq/rabbitmq-env.conf b/elements/rabbitmq-server/files/etc/rabbitmq/rabbitmq-env.conf new file mode 100644 index 000000000..aadde2db9 --- /dev/null +++ b/elements/rabbitmq-server/files/etc/rabbitmq/rabbitmq-env.conf @@ -0,0 +1,4 @@ +HOME=/var/lib/rabbitmq +LOG_BASE=/var/log/rabbitmq +MNESIA_BASE=/var/lib/rabbitmq/mnesia +PID_FILE=/var/run/rabbitmq/pid diff --git a/elements/rabbitmq-server/install.d/20-rabbitmq-server b/elements/rabbitmq-server/install.d/20-rabbitmq-server index f7e33d819..41bb0c4a6 100755 --- a/elements/rabbitmq-server/install.d/20-rabbitmq-server +++ b/elements/rabbitmq-server/install.d/20-rabbitmq-server @@ -8,6 +8,8 @@ install-packages rabbitmq-server register-state-path --leave-symlink /var/lib/rabbitmq register-state-path --leave-symlink /var/log/rabbitmq +FILES="$(dirname $0)/../files" + # Note(jang): the rabbitmq-server service is installed, but not started, since # the first run of os-collect-config is required to configure it properly. @@ -20,28 +22,8 @@ if [ "$DISTRO_NAME" = "ubuntu" ] || [ "$DISTRO_NAME" = "debian" -a "$DIB_INIT_SY # that it'll be running a venv-based service to use directly. Install an upstart # configuration that's compatible with os-svc-enable and os-svc-restart - cat > /etc/init/rabbitmq-server.conf < /var/log/rabbitmq/startup_log 2> /var/log/rabbitmq/startup_err -post-start exec /usr/sbin/rabbitmqctl wait /var/run/rabbitmq/pid >/dev/null 2>&1 -pre-stop exec /usr/sbin/rabbitmqctl stop /var/run/rabbitmq/pid >/dev/null 2>&1 - -# Get the Erlang nameserver too. -post-stop exec /usr/bin/pkill -u rabbitmq >/dev/null 2>&1 - -eof + FILE=/etc/init/rabbitmq-server.conf + install -g root -o root -m 0755 "${FILES}${FILE}" "${FILE}" fi if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then @@ -52,13 +34,8 @@ if [ "$DIB_INIT_SYSTEM" = "systemd" ]; then sed -i 's/\[Service\]/\[Service\]\nRestart=on-failure/g' /lib/systemd/system/rabbitmq-server.service fi - -cat > /etc/rabbitmq/rabbitmq-env.conf </dev/null | + grep -q "running_nodes,\[[^]]\+," +} + +# Number of nodes in the cluster according to remote node $1. +# If $1 isn't in a cluster or it's in a cluster by itself, then this will +# return 0. +function cluster_size() { + local remote_node="${1}" + rabbitmqctl -n "rabbit@${remote_node}" cluster_status 2>/dev/null | + sed -n '/{running_nodes,\[[^]]\+,/,/\]\},/p' | + wc -l +} + +function leave_cluster() { + rabbitmqctl stop_app + # This syncs all data into the cluster, then removes this node, cleaning local mnesia. + rabbitmqctl reset +} +export -f leave_cluster + +function join_cluster_with() { + local remote_node="${1}" + local local_node="${2}" + rabbitmqctl stop_app + rabbitmqctl join_cluster "rabbit@${remote_node}" 2>/dev/null || true + rabbitmqctl start_app + + if ! is_in_cluster; then + echo "Failed to join node [${local_node}] with [${remote_node}]..." + return 1 + fi +} + +BOOTSTRAP_NODE="$(os-apply-config --key bootstrap_host.bootstrap_nodeid --type netaddress --key-default '')" +NODE_INDEX="" +# Find the nodes being worked on in the NODES array. +for (( index = 0; index < ${TOTAL_NODES}; index++ )); do + if [ "${NODES[$index]}" == "${LOCAL_RABBIT_HOST}" ]; then + NODE_INDEX=${index} + fi +done + +if [ -z "${BOOTSTRAP_NODE}" -o ${TOTAL_NODES} -lt 3 -o -z "${NODE_INDEX}" ]; then + # We do not know who the bootstrap is, why are we attempting to bring up a Rabbit cluster? + # -OR- we do not have sufficient nodes to support HA so lets abort. + # -OR- we did not find our node in the array and hence did not set node_indexs. + echo "bootstrap_host.bootstrap_nodeid: ${BOOTSTRAP_NODE}, TOTAL_NODES: ${TOTAL_NODES}, NODE_INDEX: ${NODE_INDEX}" + echo "RabbitMQ cluster configuration prerequisites not met, aborting." + exit 255 +fi + +for (( index = 0; index < ${TOTAL_NODES}; index++ )); do + if ! ping -c1 "${NODES[$index]}"; then + echo "RabbitMQ host unreachable: ${NODES[$index]}" + HOST_UNREACHABLE=1 + fi +done +[ -z "${HOST_UNREACHABLE:-}" ] || exit 1 + +# Refuse to stop unless all nodes are running, this avoids pause_minority. +# From the RabbitMQ docs: pause_minority +# Your network is maybe less reliable. You have clustered across 3 AZs +# in EC2, and you assume that only one AZ will fail at once. In that +# scenario you want the remaining two AZs to continue working and the +# nodes from the failed AZ to rejoin automatically and without fuss when +# the AZ comes back. +# (See: os-apply-config/etc/rabbitmq/rabbitmq.config) +# +# We want to orchestrate nodes leaving the cluster. We'll do this using a +# metronome. For example, if we have 3 nodes, there will be six periods. +# The first node may leave in period 0. The second node may leave in period +# 1. The third node may leave in period 2. +# +# Metronome: 0 .. 1 .. 2 .. 3 .. 4 .. 5 .. +# Node leaves: 0 ....... 1 ....... 2 ....... +# +# The dead periods in between allow for $PERIOD seconds of clock +# desynchronization. PERIOD should be about the half the length of time it +# takes for a node to join the cluster. +PERIOD=10 +NODE_LEAVES_AT=$(( ${NODE_INDEX} * 2 )) +while is_in_cluster; do + NODES_IN_CLUSTER=$(cluster_size "${BOOTSTRAP_NODE}") + if [ ${NODES_IN_CLUSTER} -gt ${TOTAL_NODES} ]; then + echo "A node we don't know about appears to have joined the cluster, aborting." + exit 255 + fi + + METRONOME=$(( ($(date +%s) / ${PERIOD}) % (${TOTAL_NODES} * 2) )) + if [ ${NODES_IN_CLUSTER} -eq ${TOTAL_NODES} -a \ + ${METRONOME} -eq ${NODE_LEAVES_AT} ]; then + # All other nodes are in the cluster and it's our allotted time, + # safe to leave. Tell other nodes we're about to leave the cluster. + echo "Leaving cluster..." + timeout 300 bash -c leave_cluster || { rabbitmqctl start_app && exit 1; } + else + echo "Refusing to allow node to leave cluster..." + fi + sleep 2 +done + +# Restart RabbitMQ. We need to have left the cluster first or we risk data loss. +os-svc-restart -n rabbitmq-server + +# We're the bootstrap node +if [ "${LOCAL_RABBIT_HOST}" == "${BOOTSTRAP_NODE}" ]; then + # If we are not in a cluster keep trying to join a node. + # Note: This loop is required as the BOOTSTRAP_NODE may have left a running + # cluster and it therefore must re-join. + while ! is_in_cluster; do + # Try to join with each node in turn. + COUNT=$(( (${COUNT:-0} + 1) % ${TOTAL_NODES} )) + if [ ${COUNT} -ne ${NODE_INDEX} ]; then + join_cluster_with "${NODES[${COUNT}]}" "${LOCAL_RABBIT_HOST}" || true + fi + done + + # Check that we have not got a partition i.e. The case where we do not have + # synced clocks and hence we can get split in the clustering A+B C. If we + # get this we will wait as this is more favourable than a bad/broken + # cluster set-up. + while [[ $(cluster_size "${LOCAL_RABBIT_HOST}") -ne ${TOTAL_NODES} ]]; do + echo "Waiting for nodes to join [${BOOTSTRAP_NODE}]..." + sleep 10 + done +else + # Wait until the BOOTSTRAP_NODE has at least formed a cluster with one node. + while [[ $(cluster_size "${BOOTSTRAP_NODE}") -lt 2 ]]; do + echo "Waiting for bootstrap node to initialise the cluster..." + sleep 10 + done + is_in_cluster || join_cluster_with "${BOOTSTRAP_NODE}" "${LOCAL_RABBIT_HOST}" +fi + +# Make sure that all queues (except those with auto-generated names) are +# mirrored across all nodes in the cluster running: +rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}' + +echo "RabbitMQ cluster configuration complete..." diff --git a/elements/rabbitmq-server/os-refresh-config/post-configure.d/50-rabbitmq-passwords b/elements/rabbitmq-server/os-refresh-config/post-configure.d/52-rabbitmq-passwords similarity index 100% rename from elements/rabbitmq-server/os-refresh-config/post-configure.d/50-rabbitmq-passwords rename to elements/rabbitmq-server/os-refresh-config/post-configure.d/52-rabbitmq-passwords diff --git a/elements/rabbitmq-server/os-refresh-config/pre-configure.d/80-rabbitmq-cluster b/elements/rabbitmq-server/os-refresh-config/pre-configure.d/80-rabbitmq-cluster index 8dce997a7..2f1f8b723 100755 --- a/elements/rabbitmq-server/os-refresh-config/pre-configure.d/80-rabbitmq-cluster +++ b/elements/rabbitmq-server/os-refresh-config/pre-configure.d/80-rabbitmq-cluster @@ -1,6 +1,4 @@ #!/bin/bash set -eux -[ -d /mnt/state/var/lib/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/lib/rabbitmq -install -m 600 -o rabbitmq -g rabbitmq /dev/null /mnt/state/var/lib/rabbitmq/.erlang.cookie -[ -d /mnt/state/var/log/rabbitmq ] || install -d -D -m 0770 -o rabbitmq -g rabbitmq /mnt/state/var/log/rabbitmq +install -m 600 -o rabbitmq -g rabbitmq /dev/null /var/lib/rabbitmq/.erlang.cookie