Add ability to disable HA for RabbitMQ queues
Add two flags: * enable_rpc_ha which enables queue mirroring for RPC queues * enable_notifications_ha which enables queue mirroring for Ceilometer queues Since the feature is experimental, both flags are set to true by default to preserve current behaviour. The change is implemented in several steps: * the upstream script changed so that it allows to extend the list of parameters and uses a policy file to define RabbitMQ policies. * we add our own version of OCF script which wraps around the upstream one. It defines a new enable_rpc_ha and enable_notifications_ha parameter and passes their value to the upstream script. * we add our policy file, where we use the introduced parameters to decide which policies we should set. So we will have two OCF scripts for RabbitMQ in our deployment: * rabbitmq-server-upstream - the upstream version * rabbitmq-server - our extention, which will be used in the environment The upstream version of the script is pushed to the upstream along with empty policy file, so that other users can define their own policies or extend the script if needed. Here are the corresponding pull requests: https://github.com/rabbitmq/rabbitmq-server/pull/480 https://github.com/rabbitmq/rabbitmq-server/pull/482 (both are already merged) Text for Operations Guide It is possible to significantly reduce load which OpenStack puts on RabbitMQ by disabling queue mirroring. This could be done separately for RPC queues and Ceilometer ones. To disable mirroring for RPC queues, execute the following command on one of the controllers: crm_resource --resource p_rabbitmq-server --set-parameter \ enable_rpc_ha --parameter-value false To disable mirroring for Ceilometer queues, execute the following command on one of the controllers: crm_resource --resource p_rabbitmq-server --set-parameter \ enable_notifications_ha --parameter-value false In order for any of the changes to take effect, RabbitMQ service should be restarted. To do that, first execute pcs resource disable master_p_rabbitmq-server Then monitor RabbitMQ state using command pcs resource until it shows that all RabbitMQ nodes are stopped. Once they are, execute the following command to start RabbitMQ: pcs resource enable master_p_rabbitmq-server Beware: during restart all messages accumulated in RabbitMQ will be lost. Also, OpenStack will stop functioning until RabbitMQ is up again, so plan accordingly. Note that it is not yet well tested how this configuration affects failover when some cluster nodes go down. Hence it is experimental, use at your own risk! DocImpact: ops-guide Implements: blueprint rabbitmq-disable-mirroring-for-rpc Change-Id: I80ae231ca64e2a903b0968d36ba0e85ca9cc9891
This commit is contained in:
parent
1c74ab1f16
commit
7e96ef47ca
|
@ -9,7 +9,9 @@ override_dh_fixperms:
|
|||
|
||||
override_dh_install:
|
||||
dh_install
|
||||
mv debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq-server
|
||||
#TODO(dmitryme): remove rabbitmq-server-upstream once we switch to rabbitmq-3.5.7, as it will be included here
|
||||
mv debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq-server-upstream
|
||||
mv debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq-fuel debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/rabbitmq-server
|
||||
mv debian/fuel-misc/usr/bin/logrotate debian/fuel-misc/usr/bin/fuel-logrotate
|
||||
mv debian/fuel-umm/usr/lib/umm/umm_svc.u1404 debian/fuel-umm/usr/lib/umm/umm_svc.local
|
||||
|
||||
|
|
|
@ -47,6 +47,8 @@ if $queue_provider == 'rabbitmq' {
|
|||
$mnesia_table_loading_timeout = hiera('mnesia_table_loading_timeout', '10000')
|
||||
$rabbitmq_bind_ip_address = pick(get_network_role_property('mgmt/messaging', 'ipaddr'), 'UNSET')
|
||||
$management_bind_ip_address = hiera('management_bind_ip_address', '127.0.0.1')
|
||||
$enable_rpc_ha = hiera('enable_rpc_ha', 'true')
|
||||
$enable_notifications_ha = hiera('enable_notifications_ha', 'true')
|
||||
|
||||
# NOTE(mattymo) UNSET is a puppet ref, but would break real configs
|
||||
if $rabbitmq_bind_ip_address == 'UNSET' {
|
||||
|
@ -154,13 +156,15 @@ if $queue_provider == 'rabbitmq' {
|
|||
|
||||
if ($use_pacemaker) {
|
||||
class { 'pacemaker_wrappers::rabbitmq':
|
||||
command_timeout => $command_timeout,
|
||||
debug => $debug,
|
||||
erlang_cookie => $erlang_cookie,
|
||||
admin_user => $rabbit_hash['user'],
|
||||
admin_pass => $rabbit_hash['password'],
|
||||
host_ip => $rabbitmq_bind_ip_address,
|
||||
before => Class['nova::rabbitmq'],
|
||||
command_timeout => $command_timeout,
|
||||
debug => $debug,
|
||||
erlang_cookie => $erlang_cookie,
|
||||
admin_user => $rabbit_hash['user'],
|
||||
admin_pass => $rabbit_hash['password'],
|
||||
host_ip => $rabbitmq_bind_ip_address,
|
||||
before => Class['nova::rabbitmq'],
|
||||
enable_rpc_ha => $enable_rpc_ha,
|
||||
enable_notifications_ha => $enable_notifications_ha,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -50,17 +50,27 @@
|
|||
# definitions from a backup as part of a recovery action.
|
||||
# Defaults to undef
|
||||
#
|
||||
# [*enable_rpc_ha*]
|
||||
# Boolean. Set ha-mode=all policy for RPC queues. Note that
|
||||
# Ceilometer queues are not affected by this flag.
|
||||
#
|
||||
# [*enable_notifications_ha*]
|
||||
# Boolean. Set ha-mode=all policy for Ceilometer queues. Note
|
||||
# that RPC queues are not affected by this flag.
|
||||
#
|
||||
class pacemaker_wrappers::rabbitmq (
|
||||
$primitive_type = 'rabbitmq-server',
|
||||
$service_name = $::rabbitmq::service_name,
|
||||
$port = $::rabbitmq::port,
|
||||
$host_ip = '127.0.0.1',
|
||||
$debug = false,
|
||||
$ocf_script_file = 'cluster/ocf/rabbitmq',
|
||||
$command_timeout = '',
|
||||
$erlang_cookie = 'EOKOWXQREETZSHFNTPEY',
|
||||
$admin_user = undef,
|
||||
$admin_pass = undef,
|
||||
$primitive_type = 'rabbitmq-server',
|
||||
$service_name = $::rabbitmq::service_name,
|
||||
$port = $::rabbitmq::port,
|
||||
$host_ip = '127.0.0.1',
|
||||
$debug = false,
|
||||
$ocf_script_file = 'cluster/ocf/rabbitmq',
|
||||
$command_timeout = '',
|
||||
$erlang_cookie = 'EOKOWXQREETZSHFNTPEY',
|
||||
$admin_user = undef,
|
||||
$admin_pass = undef,
|
||||
$enable_rpc_ha = true,
|
||||
$enable_notifications_ha = true,
|
||||
) inherits ::rabbitmq::service {
|
||||
|
||||
if $host_ip == 'UNSET' or $host_ip == '0.0.0.0' {
|
||||
|
@ -70,13 +80,15 @@ class pacemaker_wrappers::rabbitmq (
|
|||
}
|
||||
|
||||
$parameters = {
|
||||
'host_ip' => $real_host_ip,
|
||||
'node_port' => $port,
|
||||
'debug' => $debug,
|
||||
'command_timeout' => $command_timeout,
|
||||
'erlang_cookie' => $erlang_cookie,
|
||||
'admin_user' => $admin_user,
|
||||
'admin_password' => $admin_pass,
|
||||
'host_ip' => $real_host_ip,
|
||||
'node_port' => $port,
|
||||
'debug' => $debug,
|
||||
'command_timeout' => $command_timeout,
|
||||
'erlang_cookie' => $erlang_cookie,
|
||||
'admin_user' => $admin_user,
|
||||
'admin_password' => $admin_pass,
|
||||
'enable_rpc_ha' => $enable_rpc_ha,
|
||||
'enable_notifications_ha' => $enable_notifications_ha,
|
||||
}
|
||||
|
||||
$metadata = {
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#
|
||||
# See usage() function below for more details ...
|
||||
#
|
||||
# Note that the script uses set_rabbitmq_policy.sh script located in the
|
||||
# same directory to setup RabbitMQ policies.
|
||||
#
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
|
@ -92,6 +95,11 @@ UEND
|
|||
}
|
||||
|
||||
meta_data() {
|
||||
# The EXTENDED_OCF_PARAMS parameter below does not exist by default
|
||||
# and hence converted to an empty string unless overridden. It
|
||||
# could be used by an extention script to add new parameters. For
|
||||
# example see https://review.openstack.org/#/c/249180/10
|
||||
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
|
@ -268,6 +276,8 @@ If too many timeouts happen in a raw, the monitor call will return with error.
|
|||
<content type="string" default="${OCF_RESKEY_max_rabbitmqctl_timeouts_default}" />
|
||||
</parameter>
|
||||
|
||||
$EXTENDED_OCF_PARAMS
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
|
@ -1733,11 +1743,10 @@ action_promote() {
|
|||
ocf_log info "${LH} action end."
|
||||
exit $OCF_FAILED_MASTER
|
||||
fi
|
||||
ocf_log info "${LH} Setting HA policy for all queues"
|
||||
${OCF_RESKEY_ctl} set_policy ha-all "." '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0
|
||||
${OCF_RESKEY_ctl} set_policy heat_rpc_expire "^heat-engine-listener\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy results_expire "^results\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy tasks_expire "^tasks\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
|
||||
local set_policy_path="$(dirname $0)/set_rabbitmq_policy.sh"
|
||||
[ -f $set_policy_path ] && . $set_policy_path
|
||||
|
||||
# create timestamp file
|
||||
ocf_log info "${LH} Updating start timestamp"
|
||||
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update $(now)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#!/bin/bash
|
||||
|
||||
OCF_RESKEY_enable_rpc_ha_default=true
|
||||
: ${OCF_RESKEY_enable_rpc_ha=${OCF_RESKEY_enable_rpc_ha_default}}
|
||||
|
||||
OCF_RESKEY_enable_notifications_ha_default=true
|
||||
: ${OCF_RESKEY_enable_notifications_ha=${OCF_RESKEY_enable_notifications_ha_default}}
|
||||
|
||||
read -d '' EXTENDED_OCF_PARAMS << EOF
|
||||
<parameter name="enable_rpc_ha" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Set ha-mode=all policy for RPC queues. Note that Ceilometer queues are not
|
||||
affected by this flag.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Set ha-mode=all policy for RPC queues</shortdesc>
|
||||
<content type="boolean" default="${OCF_RESKEY_enable_rpc_ha_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="enable_notifications_ha" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Set ha-mode=all policy for Ceilometer queues. Note that RPC queues are not
|
||||
affected by this flag.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Set ha-mode=all policy for Ceilometer queues</shortdesc>
|
||||
<content type="boolean" default="${OCF_RESKEY_enable_notifications_ha_default}" />
|
||||
</parameter>
|
||||
EOF
|
||||
|
||||
# That is 'rabbitmq' file in the current directory, but it is renamed to
|
||||
# rabbitmq-server-upstream during packaging
|
||||
#TODO(dmitryme): remove rabbitmq-server-upstream once we switch to
|
||||
# rabbitmq-3.5.7, as it will be included here
|
||||
upstream_rabbitmq_ocf_script="$(dirname $0)/rabbitmq-server-upstream"
|
||||
. $upstream_rabbitmq_ocf_script
|
|
@ -0,0 +1,30 @@
|
|||
# This script is called by rabbitmq-server-ha.ocf during RabbitMQ
|
||||
# cluster start up. It is a convenient place to set your cluster
|
||||
# policy here, for example:
|
||||
# ${OCF_RESKEY_ctl} set_policy ha-all "." '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0
|
||||
|
||||
if [ "${OCF_RESKEY_enable_rpc_ha}" = 'true' ] ; then
|
||||
${OCF_RESKEY_ctl} set_policy heat_rpc_expire "^heat-engine-listener\\." '{"expires":3600000, "ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy results_expire "^results\\." '{"expires":3600000, "ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy tasks_expire "^tasks\\." '{"expires":3600000, "ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 1
|
||||
|
||||
if [ "${OCF_RESKEY_enable_notifications_ha}" = 'true' ] ; then
|
||||
ocf_log info "${LH} Setting HA policy for all queues"
|
||||
${OCF_RESKEY_ctl} set_policy ha-all "." '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0
|
||||
else
|
||||
ocf_log info "${LH} Setting HA policy for all queues except Ceilometer ones"
|
||||
# The regex below matches any string except the ones starting with either of 'event.', 'metering.' or 'notifications.'
|
||||
${OCF_RESKEY_ctl} set_policy ha-all-except-notif "^((?!(event|metering|notifications)\.).)*$" '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0
|
||||
fi
|
||||
|
||||
else
|
||||
ocf_log info "${LH} Do not set HA policy for all queues"
|
||||
${OCF_RESKEY_ctl} set_policy heat_rpc_expire "^heat-engine-listener\\." '{"expires":3600000}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy results_expire "^results\\." '{"expires":3600000}' --apply-to all --priority 1
|
||||
${OCF_RESKEY_ctl} set_policy tasks_expire "^tasks\\." '{"expires":3600000}' --apply-to all --priority 1
|
||||
|
||||
if [ "${OCF_RESKEY_enable_notifications_ha}" = 'true' ] ; then
|
||||
ocf_log info "${LH} Setting HA policy for Ceilometer queues"
|
||||
${OCF_RESKEY_ctl} set_policy ha-notif "^(event|metering|notifications)\." '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0
|
||||
fi
|
||||
fi
|
|
@ -91,7 +91,10 @@ install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-ovs-agent %{buildr
|
|||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-metadata-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-metadata-agent
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-dhcp-agent
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-l3-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-l3-agent
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/rabbitmq %{buildroot}/usr/lib/ocf/resource.d/fuel/rabbitmq-server
|
||||
#TODO(dmitryme): remove rabbitmq-server-upstream once we switch to rabbitmq-3.5.7, as it will be included here
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/rabbitmq %{buildroot}/usr/lib/ocf/resource.d/fuel/rabbitmq-server-upstream
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/rabbitmq-fuel %{buildroot}/usr/lib/ocf/resource.d/fuel/rabbitmq-server
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/set_rabbitmq_policy.sh %{buildroot}/usr/lib/ocf/resource.d/fuel/set_rabbitmq_policy.sh
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ns_IPaddr2 %{buildroot}/usr/lib/ocf/resource.d/fuel/ns_IPaddr2
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-agent-central %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-agent-central
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-alarm-evaluator %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-alarm-evaluator
|
||||
|
|
Loading…
Reference in New Issue