Add rabbitmq deployment with HA. (#123)

This works is based on http://github.com/openstack-fuel-ccp-rabbitmq and it mostly adjusts this solution
to a helm based deployment.
This commit is contained in:
Tomasz Paszkowski 2017-02-17 19:49:32 +01:00 committed by Alan Meadows
parent d63d344dc9
commit e3f1389f60
19 changed files with 428 additions and 38 deletions

View File

@ -1,12 +1,12 @@
.PHONY: ceph bootstrap mariadb postgresql keystone memcached rabbitmq helm-toolkit openstack neutron nova cinder heat maas all clean
.PHONY: ceph bootstrap mariadb etcd postgresql keystone memcached rabbitmq helm-toolkit openstack neutron nova cinder heat maas all clean
B64_DIRS := helm-toolkit/secrets
B64_EXCLUDE := $(wildcard helm-toolkit/secrets/*.b64)
CHARTS := ceph mariadb postgresql rabbitmq memcached keystone glance horizon neutron nova cinder heat maas openstack
CHARTS := ceph mariadb etcd postgresql rabbitmq memcached keystone glance horizon neutron nova cinder heat maas openstack
TOOLKIT_TPL := helm-toolkit/templates/_globals.tpl
all: helm-toolkit ceph bootstrap mariadb postgresql rabbitmq memcached keystone glance horizon neutron nova cinder heat maas openstack
all: helm-toolkit ceph bootstrap mariadb etcd postgresql rabbitmq memcached keystone glance horizon neutron nova cinder heat maas openstack
helm-toolkit: build-helm-toolkit
@ -17,6 +17,8 @@ bootstrap: build-bootstrap
mariadb: build-mariadb
etcd: build-etcd
postgresql: build-postgresql
keystone: build-keystone

3
etcd/Chart.yaml Normal file
View File

@ -0,0 +1,3 @@
description: Chart for etcd
name: etcd
version: 0.1.0

4
etcd/requirements.yaml Normal file
View File

@ -0,0 +1,4 @@
dependencies:
- name: helm-toolkit
repository: http://localhost:8879/charts
version: 0.1.0

View File

@ -0,0 +1,36 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: etcd
spec:
replicas: {{ .Values.resources.etcd.replicas }}
revisionHistoryLimit: {{ .Values.upgrades.revision_history }}
strategy:
type: {{ .Values.upgrades.pod_replacement_strategy }}
{{ if eq .Values.upgrades.pod_replacement_strategy "RollingUpdate" }}
rollingUpdate:
maxUnavailable: {{ .Values.upgrades.rolling_update.max_unavailable }}
maxSurge: {{ .Values.upgrades.rolling_update.max_surge }}
{{ end }}
template:
metadata:
labels:
app: etcd
spec:
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
containers:
- name: etcd
image: {{ .Values.images.etcd }}
imagePullPolicy: {{ .Values.images.pull_policy }}
command:
- etcd
- --listen-client-urls
- http://0.0.0.0:{{ .Values.network.port }}
- --advertise-client-urls
- http://{{ .Values.network.host }}:{{ .Values.network.port }}
ports:
- containerPort: {{ .Values.network.port }}
readinessProbe:
tcpSocket:
port: {{ .Values.network.port }}

View File

@ -0,0 +1,10 @@
apiVersion: v1
kind: Service
metadata:
name: {{ .Values.network.host }}
spec:
sessionAffinity: ClientIP
ports:
- port: {{ .Values.network.port }}
selector:
app: etcd

23
etcd/values.yaml Normal file
View File

@ -0,0 +1,23 @@
images:
etcd: gcr.io/google_containers/etcd-amd64:2.2.5
pull_policy: "IfNotPresent"
upgrades:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
labels:
node_selector_key: openstack-control-plane
node_selector_value: enabled
network:
port: 2379
host: etcd
resources:
etcd:
# No HA support, don't change this.
replicas: 1

View File

@ -0,0 +1,93 @@
#!/usr/bin/env bash
# This is taken from https://github.com/openstack/fuel-ccp-rabbitmq/blob/master/service/files/rabbitmq-check-helpers.sh.j2
MARKER_PATH=/tmp/rabbit-startup-marker
# How many seconds we give a node before successfull liveness checks
# become mandatory.
FRESH_NODE_TIMEOUT={{ .Values.probes_delay }}
LP=""
set-log-prefix() {
LP="[${1:?}]"
}
log-it() {
echo "$LP" "$@"
}
prepend-log-prefix() {
awk -v lp="$LP" '{print lp " " $0}'
}
marker-state() {
if [[ ! -f $MARKER_PATH ]]; then
echo "missing"
return 0
fi
local marker_time
marker_time="$(cat $MARKER_PATH)"
local end_of_fresh_time=$((FRESH_NODE_TIMEOUT + $marker_time))
local now
now=$(date +%s)
if [[ $now -le $end_of_fresh_time ]]; then
echo "fresh"
return 0
fi
echo "stale"
return 0
}
ping-node() {
local result
result="$(rabbitmqctl eval 'ok.' 2>&1)"
if [[ "$result" == "ok" ]]; then
return 0
fi
log-it "ping-node error:"
echo "$result" | prepend-log-prefix
return 1
}
is-node-booting() {
local result
result="$(rabbitmqctl eval 'is_pid(erlang:whereis(rabbit_boot)).' 2>&1)"
case "$result" in
true)
return 0
;;
false)
return 1
;;
*)
log-it "is-node-booting error:"
echo "$result" | prepend-log-prefix
return 1
;;
esac
}
is-node-healthy() {
local result
result=$(rabbitmqctl node_health_check -t 30 2>&1)
if [[ "$result" =~ "Health check passed" ]]; then
return 0
fi
echo "$result" | prepend-log-prefix
return 1
}
is-node-properly-clustered() {
result="$(rabbitmqctl eval 'autocluster:cluster_health_check().' 2>&1)"
if [[ $result =~ ^SUCCESS: ]]; then
return 0
elif [[ $result =~ ^FAILURE: ]]; then
echo "$result" | prepend-log-prefix
return 1
fi
log-it "Unexpected health-check output, giving the node the benefit of the doubt"
echo "$result" | prepend-log-prefix
return 0
}

View File

@ -0,0 +1,63 @@
#!/usr/bin/env bash
# This is taken from https://github.com/openstack/fuel-ccp-rabbitmq/blob/master/service/files/rabbitmq-liveness.sh.j2
set -eu
set -o pipefail
exec 1>/proc/1/fd/2 2>/proc/1/fd/2
source $(readlink -f $(dirname $0))/rabbitmq-check-helpers.sh
set-log-prefix "liveness:$$"
log-it "Starting liveness probe at $(date +'%Y-%m-%d %H:%M:%S')"
main() {
local marker_state
marker_state="$(marker-state)"
case $marker_state in
missing)
log-it "Startup marker missing, probably probe was executed too early"
return 0
;;
fresh) # node has recently started - it can still be booting
if ! ping-node; then
log-it "Fresh node, erlang VM hasn't started yet - giving it another chance"
# Erlang VM hasn't started yet
return 0
fi
if is-node-booting; then
log-it "Node is still booting, giving it some time to finish"
return 0
fi
if ! is-node-healthy; then
log-it "Node is unhealthy"
return 1
fi
if ! is-node-properly-clustered; then
log-it "Found clustering inconsistency, giving up"
return 1
fi
return 0
;;
stale) # node has started long ago - it shoud be either ready or dead
if ! is-node-healthy; then
log-it "Long-running node become unhealthy"
return 1
fi
if ! is-node-properly-clustered; then
echo "Long-running node became inconsistent with the rest of the cluster"
return 1
fi
return 0
;;
*)
log-it "Unexpected marker-state '$marker-state'"
return 1
;;
esac
}
if main; then
rc=0
else
rc=$?
fi
log-it "Ready to return $rc"
exit $rc

View File

@ -0,0 +1,33 @@
#!/usr/bin/env bash
# This is taken from https://github.com/openstack/fuel-ccp-rabbitmq/blob/master/service/files/rabbitmq-readiness.sh.j2
set -eu
set -o pipefail
exec 1>/proc/1/fd/2 2>/proc/1/fd/2
source $(readlink -f $(dirname $0))/rabbitmq-check-helpers.sh
set-log-prefix "readiness:$$"
log-it "Starting readiness probe at $(date +'%Y-%m-%d %H:%M:%S')"
main() {
if [[ "$(marker-state)" == missing ]]; then
log-it "Startup marker missing, probably probe was executed too early"
return 1
fi
if ! is-node-healthy; then
log-it "Node is unhealthy"
return 1
fi
if ! is-node-properly-clustered; then
log-it "Node is inconsistent with the rest of the cluster"
return 1
fi
return 0
}
if main; then
rc=0
else
rc=$?
fi
log-it "Ready to return $rc"
exit $rc

View File

@ -0,0 +1,12 @@
#!/bin/bash
set -eux
set -o pipefail
cp /etc/rabbitmq/erlang.cookie /var/lib/rabbitmq/.erlang.cookie
chmod 600 /var/lib/rabbitmq/.erlang.cookie
# This should be called after rabbitmq-server is started but in current design we don't have
# any other way of doing this. PreStart could not be used here as it's:
# - executed just after container creation (not after entrypoint)
# - Currently, there are (hopefully rare) scenarios where PostStart hooks may not be delivered.
# Startup marker is used by liveness and readiness probes.
date +%s > /tmp/rabbit-startup-marker
exec /usr/lib/rabbitmq/bin/rabbitmq-server

View File

@ -1,18 +0,0 @@
chown -R rabbitmq:rabbitmq /var/lib/rabbitmq
/etc/init.d/rabbitmq-server start
rabbitmq-plugins enable rabbitmq_tracing
rabbitmqctl trace_on
rabbitmqctl add_user {{ .Values.auth.default_user }} {{ .Values.auth.default_pass }} || true
rabbitmqctl set_permissions {{ .Values.auth.default_user }} ".*" ".*" ".*" || true
rabbitmqctl add_user {{ .Values.auth.admin_user }} {{ .Values.auth.admin_pass }}|| true
rabbitmqctl set_permissions {{ .Values.auth.admin_user }} ".*" ".*" ".*" || true
rabbitmqctl set_user_tags {{ .Values.auth.admin_user }} administrator || true
rabbitmqctl change_password guest {{ .Values.auth.default_pass }} || true
rabbitmqctl set_user_tags guest monitoring || true
/etc/init.d/rabbitmq-server stop
exec rabbitmq-server

View File

@ -7,5 +7,11 @@ metadata:
type: configuration
component: messaging
data:
start_rabbitmq.sh: |
{{ tuple "bin/_start_rabbit.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq-liveness.sh: |
{{ tuple "bin/_rabbitmq-liveness.sh.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq-readiness.sh: |
{{ tuple "bin/_rabbitmq-readiness.sh.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq-check-helpers.sh: |
{{ tuple "bin/_rabbitmq-check-helpers.sh.tpl" . | include "helm-toolkit.template" | indent 4 }}
start.sh: |
{{ tuple "bin/_rabbitmq-start.sh.tpl" . | include "helm-toolkit.template" | indent 4 }}

View File

@ -0,0 +1,18 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: rabbitmq-etc
labels:
system: openstack
type: configuration
component: messaging
data:
enabled_plugins: |
{{ tuple "etc/_enabled_plugins.tpl" . | include "helm-toolkit.template" | indent 4 }}
erlang.cookie: |
{{ tuple "etc/_erlang.cookie.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq-env.conf: |
{{ tuple "etc/_rabbitmq-env.conf.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq.config: |
{{ tuple "etc/_rabbitmq.config.tpl" . | include "helm-toolkit.template" | indent 4 }}

View File

@ -1,3 +1,5 @@
{{- $envAll := . }}
{{- $dependencies := .Values.dependencies }}
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
@ -18,6 +20,27 @@ spec:
app: rabbitmq
annotations:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.hash" }}
pod.beta.kubernetes.io/init-containers: '[
{{ tuple $envAll $dependencies | include "helm-toolkit.kubernetes_entrypoint_init_container" | indent 10 }}
]'
# TODO: this needs to be moved to common.
scheduler.alpha.kubernetes.io/affinity: >
{
"podAntiAffinity": {
"preferredDuringSchedulingIgnoredDuringExecution": [{
"labelSelector": {
"matchExpressions": [{
"key": "app",
"operator": "In",
"values":["rabbitmq"]
}]
},
"topologyKey": "kubernetes.io/hostname",
"weight": 10
}]
}
}
spec:
nodeSelector:
{{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }}
@ -27,6 +50,9 @@ spec:
- name: rabbitmq-bin
configMap:
name: rabbitmq-bin
- name: rabbitmq-etc
configMap:
name: rabbitmq-etc
containers:
- name: rabbitmq
image: {{ .Values.images.rabbitmq }}
@ -40,21 +66,40 @@ spec:
memory: {{ .Values.resources.api.requests.memory | quote }}
{{- end }}
command:
- bash
- /tmp/start_rabbitmq.sh
- bash
- /scripts/start.sh
env:
- name: RABBITMQ_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
readinessProbe:
timeoutSeconds: {{ .Values.probes_timeout }}
exec:
command:
- bash
- /scripts/rabbitmq-readiness.sh
livenessProbe:
tcpSocket:
port: {{.Values.network.port.public}}
initialDelaySeconds: 60
timeoutSeconds: 5
ports:
- name: rabbitmq
containerPort: {{.Values.network.port.public}}
- name: management
containerPort: {{.Values.network.port.management}}
initialDelaySeconds: {{ .Values.probes_delay }}
timeoutSeconds: {{ .Values.probes_timeout }}
exec:
command:
- bash
- /scripts/rabbitmq-liveness.sh
volumeMounts:
- name: rabbitmq-emptydir
mountPath: /var/lib/rabbitmq
- name: rabbitmq-bin
mountPath: /tmp/start_rabbitmq.sh
subPath: start_rabbitmq.sh
mountPath: /scripts
- name: rabbitmq-etc
mountPath: /etc/rabbitmq/enabled_plugins
subPath: enabled_plugins
- name: rabbitmq-etc
mountPath: /etc/rabbitmq/erlang.cookie
subPath: erlang.cookie
- name: rabbitmq-etc
mountPath: /etc/rabbitmq/rabbitmq-env.conf
subPath: rabbitmq-env.conf
- name: rabbitmq-etc
mountPath: /etc/rabbitmq/rabbitmq.config
subPath: rabbitmq.config

View File

@ -0,0 +1 @@
[{{ include "helm-toolkit.joinListWithComma" .Values.enabled_plugins }}].

View File

@ -0,0 +1 @@
{{ .Values.erlang_cookie }}

View File

@ -0,0 +1,7 @@
RABBITMQ_LOGS=-
RABBITMQ_SASL_LOGS=-
AUTOCLUSTER_TYPE=etcd
AUTOCLUSTER_DELAY={{ .Values.autocluster.delay }}
RABBITMQ_USE_LONGNAME=true
AUTOCLUSTER_LOG_LEVEL={{ .Values.autocluster.log_level }}
NODENAME="rabbit@${RABBITMQ_POD_IP}"

View File

@ -0,0 +1,27 @@
[
{rabbit, [
{dummy_param_without_comma, true}
,{tcp_listeners, [
{"0.0.0.0", {{ .Values.network.port.public }} }
]}
,{default_user, <<"{{ .Values.auth.default_user }}">>}
,{default_pass, <<"{{ .Values.auth.default_pass }}">>}
,{loopback_users, []}
,{cluster_partition_handling, ignore}
,{queue_master_locator, <<"random">>}
]}
,{autocluster, [
{dummy_param_without_comma, true}
,{backend, etcd}
,{autocluster_log_level,{{ .Values.autocluster.log_level }}}
,{autocluster_failure, stop}
,{cleanup_interval, 30}
,{cluster_cleanup, true}
,{cleanup_warn_only, false}
,{etcd_node_ttl, 15}
,{etcd_scheme, http}
,{etcd_host, {{ .Values.endpoints.etcd.hosts.default }}}
,{etcd_port, {{ .Values.endpoints.etcd.port }}}
]}
].
% EOF

View File

@ -3,7 +3,7 @@
# Declare name/value pairs to be passed into your templates.
# name: value
replicas: "1" # this must be quoted to deal with atoi
replicas: 3
resources:
enabled: false
@ -37,4 +37,28 @@ network:
management: '15672'
images:
rabbitmq: "rabbitmq:3-management"
rabbitmq: "registry.mcp.fuel-infra.org/mcp/rabbitmq:ocata-unstable"
dep_check: "quay.io/stackanetes/kubernetes-entrypoint:v0.1.1"
pull_policy: "IfNotPresent"
enabled_plugins:
- autocluster
erlang_cookie: openstack-cookie
endpoints:
etcd:
hosts:
default: etcd
port: 2379
autocluster:
log_level: info
delay: 15
probes_delay: 180
probes_timeout: 10
dependencies:
service:
- etcd