RabbitMQ: Make clustering robust

This PS improves the robustnes of the RabbitMQ clustering logic
to support reforming the cluster following recreation of all pods,
and wait for the cluster to fully form before continuing in case
of an upgrade.

This ability was lost with the introduction of the following PS,
which prevented reformation of the cluster from scratch.
 * https://review.openstack.org/#/c/637337/

Change-Id: I99d32fbd3c56dde492717a7850b61001fa8f7fb5
Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
Pete Birley 2019-03-19 20:32:14 -05:00 committed by Pete Birley
parent e97faaaf0f
commit 9029dbe8dd
5 changed files with 121 additions and 2 deletions

View File

@ -0,0 +1,45 @@
#!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -e
# Extract connection details
RABBIT_HOSTNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
| awk -F'[:/]' '{print $1}'`
RABBIT_PORT=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
| awk -F'[:/]' '{print $2}'`
# Extract Admin User creadential
RABBITMQ_ADMIN_USERNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
| awk -F'[//:]' '{print $4}'`
RABBITMQ_ADMIN_PASSWORD=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
| awk -F'[//:]' '{print $5}'`
function active_rabbit_nodes () {
rabbitmqadmin \
--host="${RABBIT_HOSTNAME}" \
--port="${RABBIT_PORT}" \
--username="${RABBITMQ_ADMIN_USERNAME}" \
--password="${RABBITMQ_ADMIN_PASSWORD}" \
list nodes -f bash | wc -w
}
until test "$(active_rabbit_nodes)" -ge "$RABBIT_REPLICA_COUNT"; do
echo "Waiting for number of nodes in cluster to match number of desired pods ($RABBIT_REPLICA_COUNT)"
sleep 10
done

View File

@ -32,4 +32,6 @@ data:
{{ tuple "bin/_rabbitmq-liveness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-start.sh: |
{{ tuple "bin/_rabbitmq-start.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-wait-for-cluster.sh: |
{{ tuple "bin/_rabbitmq-wait-for-cluster.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
{{ end }}

View File

@ -0,0 +1,63 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_cluster_wait }}
{{- $envAll := . }}
{{- $serviceAccountName := print .Release.Name "-cluster-wait" }}
{{ tuple $envAll "cluster_wait" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: "{{.Release.Name}}-cluster-wait"
labels:
{{ tuple $envAll "rabbitmq" "cluster-wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
spec:
template:
metadata:
labels:
{{ tuple $envAll "rabbitmq" "cluster-wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
{{ $envAll.Values.labels.jobs.node_selector_key }}: {{ $envAll.Values.labels.test.node_selector_value | quote }}
initContainers:
{{ tuple $envAll "tests" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: {{.Release.Name}}-rabbitmq-cluster-wait
{{ tuple $envAll "scripted_test" | include "helm-toolkit.snippets.image" | indent 10 }}
env:
- name: RABBITMQ_ADMIN_CONNECTION
value: {{ tuple "oslo_messaging" "internal" "user" "http" $envAll | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | quote }}
- name: RABBIT_REPLICA_COUNT
value: {{ $envAll.Values.pod.replicas.server | quote }}
command:
- /tmp/rabbitmq-wait-for-cluster.sh
volumeMounts:
- name: rabbitmq-bin
mountPath: /tmp/rabbitmq-wait-for-cluster.sh
subPath: rabbitmq-wait-for-cluster.sh
readOnly: true
volumes:
- name: rabbitmq-bin
configMap:
name: {{ printf "%s-%s" $envAll.Release.Name "rabbitmq-bin" | quote }}
defaultMode: 0555
{{- end }}

View File

@ -138,8 +138,9 @@ spec:
readinessProbe:
initialDelaySeconds: 10
timeoutSeconds: 10
tcpSocket:
port: {{ tuple "oslo_messaging" "internal" "amqp" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
exec:
command:
- /tmp/rabbitmq-liveness.sh
livenessProbe:
initialDelaySeconds: 30
timeoutSeconds: 10

View File

@ -27,6 +27,9 @@ labels:
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
jobs:
node_selector_key: openstack-control-plane
node_selector_value: enabled
images:
tags:
@ -150,6 +153,10 @@ dependencies:
services:
- endpoint: internal
service: oslo_messaging
cluster_wait:
services:
- endpoint: internal
service: oslo_messaging
image_repo_sync:
services:
- endpoint: internal
@ -281,6 +288,7 @@ manifests:
configmap_bin: true
configmap_etc: true
ingress_management: true
job_cluster_wait: true
job_image_repo_sync: true
pod_test: true
monitoring: