Make HAProxy check of swift proxy backends via management VIP

W/o this fix, when the management interface on the controller node
running a Swift proxy is down, HAProxy would fail to update
its backend status at the storage network.

This is a problem as we want swift backends not able to connect the
swift endpoint via the management VIP to be marked down. Othewise,
responces time for any requested swift commands would be drastically
longer. Simple httpcheck option cannot resolve this because the swift
healthcheck reports OK, if conntacted via the storage network.

In order to fix this, simple healthcheck script is implemented.
This script is running as HTTP xinetid service at TCP port 49001 and
is accessible only from the localhost, 240.0.0.2, and storage plus
management networks. The service verifies the node under check for the:
a) management VIP is pingable via ICMP (by 3 packets)
b) Swift endpoint is reachable by TCP-connect via the local storage address
within 5 seconds connection timeout
c) Swift healthcheck report via the local storage address endpoint is OK

Reports an HTTP 200 OK, if all of the results are OK.
Otherwise, it would report an HTTP 503 Error.
Expected Swift node control plane failover time will be around 30 seconds.
Swift data plane is not affected.

DocImpact: Reference architecture, swift failover.

Closes-bug: #1459772
Related-bug: #1460623

Change-Id: I55a35b45257763a20f33bd47cb5c57de53558ccf
Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
Vladimir Kuklin 2015-06-01 17:57:41 +03:00
parent ff5c0a2849
commit 42df864217
9 changed files with 150 additions and 3 deletions

View File

@ -2,4 +2,5 @@ files/fuel-ha-utils/ocf/* /usr/lib/ocf/resource.d/fuel
files/fuel-ha-utils/tools/q-agent-cleanup.py /usr/bin
files/fuel-ha-utils/tools/wsrepclustercheckrc /etc
files/fuel-ha-utils/tools/clustercheck /usr/bin
files/fuel-ha-utils/tools/swiftcheck /usr/bin

View File

@ -8,6 +8,7 @@ class openstack::firewall (
$mysql_gcomm_port = 4567,
$galera_ist_port = 4568,
$galera_clustercheck_port = 49000,
$swift_proxy_check_port = 49001,
$keystone_public_port = 5000,
$swift_proxy_port = 8080,
$swift_object_port = 6000,
@ -104,7 +105,8 @@ class openstack::firewall (
}
firewall {'103 swift':
port => [$swift_proxy_port, $swift_object_port, $swift_container_port, $swift_account_port],
port => [$swift_proxy_port, $swift_object_port, $swift_container_port,
$swift_account_port, $swift_proxy_check_port],
proto => 'tcp',
action => 'accept',
}

View File

@ -9,5 +9,9 @@ class openstack::ha::swift (
server_names => filter_hash($servers, 'name'),
ipaddresses => filter_hash($servers, 'storage_address'),
public => true,
haproxy_config_options => {
'option' => ['httpchk', 'httplog', 'httpclose'],
},
balancermember_options => 'check port 49001 inter 15s fastinter 2s downinter 8s rise 3 fall 3',
}
}

View File

@ -16,7 +16,7 @@
# class { 'openstack::reserved_ports': }
#
class openstack::reserved_ports ( $ports = '49000,35357,41055,58882' ) {
class openstack::reserved_ports ( $ports = '49000,49001,35357,41055,58882' ) {
sysctl::value { 'net.ipv4.ip_local_reserved_ports': value => $ports }
}

View File

@ -0,0 +1,71 @@
# == Class: openstack::swift::status
#
# Configures a script that will check the health
# of swift proxy backend via given endpoint, assumes swift module is in catalog
#
# === Parameters:
#
# [*address*]
# (optional) xinet.d bind address for swiftcheck
# Defaults to 0.0.0.0
#
# [*only_from*]
# (optional) xinet.d only_from address for swiftcheck
# Defaults to 127.0.0.1
#
# [*port*]
# (optional) Port for swift check service
# Defaults to 49001
#
# [*endpoint*]
# (optional) The Swift endpoint host for swift healthcheck
# Defaults to http://127.0.0.1:8080
#
# [*vip*]
# (optional) The VIP address for the ICMP connectivity check
# Defaults to 127.0.0.1
#
# [*con_timeout*]
# (optional) The timeout for Swift endpoint connection for swift healthcheck
# Defaults to 5 seconds
#
class openstack::swift::status (
$address = '0.0.0.0',
$only_from = '127.0.0.1',
$port = '49001',
$endpoint = 'http://127.0.0.1:8080',
$vip = '127.0.0.1',
$con_timeout = '5',
) {
augeas { 'swiftcheck':
context => '/files/etc/services',
changes => [
"set /files/etc/services/service-name[port = '${port}']/port ${port}",
"set /files/etc/services/service-name[port = '${port}'] swiftcheck",
"set /files/etc/services/service-name[port = '${port}']/protocol tcp",
"set /files/etc/services/service-name[port = '${port}']/#comment 'Swift Health Check'",
],
}
$group = $::osfamily ? {
'redhat' => 'nobody',
'debian' => 'nogroup',
default => 'nobody',
}
include xinetd
xinetd::service { 'swiftcheck':
bind => $address,
port => $port,
only_from => $only_from,
cps => '512 10',
per_source => 'UNLIMITED',
server => '/usr/bin/swiftcheck',
server_args => "${endpoint} ${vip} ${con_timeout}",
user => 'nobody',
group => $group,
flags => 'IPv4',
}
}

View File

@ -14,7 +14,7 @@ sysctl::value { 'net.ipv4.conf.all.arp_accept': value => '1' }
sysctl::value { 'net.ipv4.conf.default.arp_accept': value => '1' }
# setting kernel reserved ports
# defaults are 49000,35357,41055,58882
# defaults are 49000,49001,35357,41055,58882
class { 'openstack::reserved_ports': }
### TCP connections keepalives and failover related parameters ###

View File

@ -1,6 +1,8 @@
notice('MODULAR: swift.pp')
$swift_hash = hiera('swift_hash')
$proxy_port = hiera('proxy_port', '8080')
$network_scheme = hiera('network_scheme', {})
$storage_hash = hiera('storage_hash')
$mp_hash = hiera('mp')
$management_vip = hiera('management_vip')
@ -50,6 +52,8 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora
}
$ring_part_power = calc_ring_part_power($controllers,$swift_hash['resize_value'])
$sto_net = $network_scheme['endpoints']['br-storage']['IP']
$man_net = $network_scheme['endpoints']['br-mgmt']['IP']
class { 'openstack::swift::proxy':
swift_user_password => $swift_hash[user_password],
@ -59,11 +63,19 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora
controller_node_address => $management_vip,
swift_local_net_ip => $storage_address,
master_swift_proxy_ip => $master_swift_proxy_ip,
proxy_port => $proxy_port,
debug => $debug,
verbose => $verbose,
log_facility => 'LOG_SYSLOG',
ceilometer => hiera('use_ceilometer'),
ring_min_part_hours => $ring_min_part_hours,
} ->
class { 'openstack::swift::status':
endpoint => "http://${storage_address}:${proxy_port}",
vip => $management_vip,
only_from => "127.0.0.1 240.0.0.2 ${sto_net} ${man_net}",
con_timeout => 5
}
class { 'swift::keystone::auth':

View File

@ -0,0 +1,55 @@
#!/bin/bash
#
# Script to make a HAProxy capable of monitoring the Swift proxy backends status.
# This script checks the given management VIP via ICMP and also performs a Swift
# healthcheck via the given Swift endpoint with the given connect timeout.
# Reports an HTTP 200 OK, if all of the results are OK.
# If the healthcheck result was not OK or the Swift endpoint/VIP was not reachable,
# it would report an HTTP 503 Error.
#
# Author: Bogdan Dobrelya <bdobrelia@mirantis.com>
#
if [[ $1 == '-h' || $1 == '--help' || "$#" -ne 3 ]];then
echo "Usage: $0 <local_swift_endpoint> <management_vip> <connect_timeout>"
exit
fi
# Remove trailing slash
url=`echo $1 | sed 's#/*$##'`
# Check for the management VIP avail.
ping -c3 $2 2>&1 >/dev/null
rc=$?
rc2=1
result="ERROR"
# Also check for the swift healthcheck report via given endpoint url
if [[ $rc == 0 ]]; then
result=`/usr/bin/curl --connect-timeout ${3} -XGET ${url}/healthcheck 2>/dev/null`
rc2=$?
fi
if [[ $result == "OK" && $rc2 == 0 ]] ; then
# Swift healthcheck is OK and endpoint is reachable
# return HTTP 200. Shell return-code is 0
echo -en "HTTP/1.1 200 OK\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 5\r\n"
echo -en "\r\n"
echo -en "OK.\r\n"
sleep 0.1
exit 0
else
# Swift healthcheck failed or endpoint was not reachable,
# return HTTP 503. Shell return-code is 1
echo -en "HTTP/1.1 503 Service Unavailable\r\n"
echo -en "Content-Type: text/plain\r\n"
echo -en "Connection: close\r\n"
echo -en "Content-Length: 8\r\n"
echo -en "\r\n"
echo -en "Error.\r\n"
sleep 0.1
exit 1
fi

View File

@ -72,6 +72,7 @@ install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-agent-central %{bui
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-alarm-evaluator %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-alarm-evaluator
install -m 0755 %{files_source}/fuel-ha-utils/tools/q-agent-cleanup.py %{buildroot}/usr/bin/q-agent-cleanup.py
install -m 0755 %{files_source}/fuel-ha-utils/tools/clustercheck %{buildroot}/usr/bin/clustercheck
install -m 0755 %{files_source}/fuel-ha-utils/tools/swiftcheck %{buildroot}/usr/bin/swiftcheck
install -m 0644 %{files_source}/fuel-ha-utils/tools/wsrepclustercheckrc %{buildroot}/etc/wsrepclustercheckrc
install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.py %{buildroot}/usr/bin/rabbit-fence.py
install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.init %{buildroot}/etc/init.d/rabbit-fence
@ -171,6 +172,7 @@ For further information go to http://wiki.openstack.org/Fuel
/usr/lib/ocf/resource.d/fuel
/usr/bin/q-agent-cleanup.py
/usr/bin/clustercheck
/usr/bin/swiftcheck
%config(noreplace) /etc/wsrepclustercheckrc
#