Make HAProxy check of swift proxy backends via management VIP
W/o this fix, when the management interface on the controller node running a Swift proxy is down, HAProxy would fail to update its backend status at the storage network. This is a problem as we want swift backends not able to connect the swift endpoint via the management VIP to be marked down. Othewise, responces time for any requested swift commands would be drastically longer. Simple httpcheck option cannot resolve this because the swift healthcheck reports OK, if conntacted via the storage network. In order to fix this, simple healthcheck script is implemented. This script is running as HTTP xinetid service at TCP port 49001 and is accessible only from the localhost, 240.0.0.2, and storage plus management networks. The service verifies the node under check for the: a) management VIP is pingable via ICMP (by 3 packets) b) Swift endpoint is reachable by TCP-connect via the local storage address within 5 seconds connection timeout c) Swift healthcheck report via the local storage address endpoint is OK Reports an HTTP 200 OK, if all of the results are OK. Otherwise, it would report an HTTP 503 Error. Expected Swift node control plane failover time will be around 30 seconds. Swift data plane is not affected. DocImpact: Reference architecture, swift failover. Closes-bug: #1459772 Related-bug: #1460623 Change-Id: I55a35b45257763a20f33bd47cb5c57de53558ccf Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
parent
ff5c0a2849
commit
42df864217
|
@ -2,4 +2,5 @@ files/fuel-ha-utils/ocf/* /usr/lib/ocf/resource.d/fuel
|
|||
files/fuel-ha-utils/tools/q-agent-cleanup.py /usr/bin
|
||||
files/fuel-ha-utils/tools/wsrepclustercheckrc /etc
|
||||
files/fuel-ha-utils/tools/clustercheck /usr/bin
|
||||
files/fuel-ha-utils/tools/swiftcheck /usr/bin
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ class openstack::firewall (
|
|||
$mysql_gcomm_port = 4567,
|
||||
$galera_ist_port = 4568,
|
||||
$galera_clustercheck_port = 49000,
|
||||
$swift_proxy_check_port = 49001,
|
||||
$keystone_public_port = 5000,
|
||||
$swift_proxy_port = 8080,
|
||||
$swift_object_port = 6000,
|
||||
|
@ -104,7 +105,8 @@ class openstack::firewall (
|
|||
}
|
||||
|
||||
firewall {'103 swift':
|
||||
port => [$swift_proxy_port, $swift_object_port, $swift_container_port, $swift_account_port],
|
||||
port => [$swift_proxy_port, $swift_object_port, $swift_container_port,
|
||||
$swift_account_port, $swift_proxy_check_port],
|
||||
proto => 'tcp',
|
||||
action => 'accept',
|
||||
}
|
||||
|
|
|
@ -9,5 +9,9 @@ class openstack::ha::swift (
|
|||
server_names => filter_hash($servers, 'name'),
|
||||
ipaddresses => filter_hash($servers, 'storage_address'),
|
||||
public => true,
|
||||
haproxy_config_options => {
|
||||
'option' => ['httpchk', 'httplog', 'httpclose'],
|
||||
},
|
||||
balancermember_options => 'check port 49001 inter 15s fastinter 2s downinter 8s rise 3 fall 3',
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
# class { 'openstack::reserved_ports': }
|
||||
#
|
||||
|
||||
class openstack::reserved_ports ( $ports = '49000,35357,41055,58882' ) {
|
||||
class openstack::reserved_ports ( $ports = '49000,49001,35357,41055,58882' ) {
|
||||
sysctl::value { 'net.ipv4.ip_local_reserved_ports': value => $ports }
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
# == Class: openstack::swift::status
|
||||
#
|
||||
# Configures a script that will check the health
|
||||
# of swift proxy backend via given endpoint, assumes swift module is in catalog
|
||||
#
|
||||
# === Parameters:
|
||||
#
|
||||
# [*address*]
|
||||
# (optional) xinet.d bind address for swiftcheck
|
||||
# Defaults to 0.0.0.0
|
||||
#
|
||||
# [*only_from*]
|
||||
# (optional) xinet.d only_from address for swiftcheck
|
||||
# Defaults to 127.0.0.1
|
||||
#
|
||||
# [*port*]
|
||||
# (optional) Port for swift check service
|
||||
# Defaults to 49001
|
||||
#
|
||||
# [*endpoint*]
|
||||
# (optional) The Swift endpoint host for swift healthcheck
|
||||
# Defaults to http://127.0.0.1:8080
|
||||
#
|
||||
# [*vip*]
|
||||
# (optional) The VIP address for the ICMP connectivity check
|
||||
# Defaults to 127.0.0.1
|
||||
#
|
||||
# [*con_timeout*]
|
||||
# (optional) The timeout for Swift endpoint connection for swift healthcheck
|
||||
# Defaults to 5 seconds
|
||||
#
|
||||
|
||||
class openstack::swift::status (
|
||||
$address = '0.0.0.0',
|
||||
$only_from = '127.0.0.1',
|
||||
$port = '49001',
|
||||
$endpoint = 'http://127.0.0.1:8080',
|
||||
$vip = '127.0.0.1',
|
||||
$con_timeout = '5',
|
||||
) {
|
||||
|
||||
augeas { 'swiftcheck':
|
||||
context => '/files/etc/services',
|
||||
changes => [
|
||||
"set /files/etc/services/service-name[port = '${port}']/port ${port}",
|
||||
"set /files/etc/services/service-name[port = '${port}'] swiftcheck",
|
||||
"set /files/etc/services/service-name[port = '${port}']/protocol tcp",
|
||||
"set /files/etc/services/service-name[port = '${port}']/#comment 'Swift Health Check'",
|
||||
],
|
||||
}
|
||||
|
||||
$group = $::osfamily ? {
|
||||
'redhat' => 'nobody',
|
||||
'debian' => 'nogroup',
|
||||
default => 'nobody',
|
||||
}
|
||||
|
||||
include xinetd
|
||||
xinetd::service { 'swiftcheck':
|
||||
bind => $address,
|
||||
port => $port,
|
||||
only_from => $only_from,
|
||||
cps => '512 10',
|
||||
per_source => 'UNLIMITED',
|
||||
server => '/usr/bin/swiftcheck',
|
||||
server_args => "${endpoint} ${vip} ${con_timeout}",
|
||||
user => 'nobody',
|
||||
group => $group,
|
||||
flags => 'IPv4',
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@ sysctl::value { 'net.ipv4.conf.all.arp_accept': value => '1' }
|
|||
sysctl::value { 'net.ipv4.conf.default.arp_accept': value => '1' }
|
||||
|
||||
# setting kernel reserved ports
|
||||
# defaults are 49000,35357,41055,58882
|
||||
# defaults are 49000,49001,35357,41055,58882
|
||||
class { 'openstack::reserved_ports': }
|
||||
|
||||
### TCP connections keepalives and failover related parameters ###
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
notice('MODULAR: swift.pp')
|
||||
|
||||
$swift_hash = hiera('swift_hash')
|
||||
$proxy_port = hiera('proxy_port', '8080')
|
||||
$network_scheme = hiera('network_scheme', {})
|
||||
$storage_hash = hiera('storage_hash')
|
||||
$mp_hash = hiera('mp')
|
||||
$management_vip = hiera('management_vip')
|
||||
|
@ -50,6 +52,8 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora
|
|||
}
|
||||
|
||||
$ring_part_power = calc_ring_part_power($controllers,$swift_hash['resize_value'])
|
||||
$sto_net = $network_scheme['endpoints']['br-storage']['IP']
|
||||
$man_net = $network_scheme['endpoints']['br-mgmt']['IP']
|
||||
|
||||
class { 'openstack::swift::proxy':
|
||||
swift_user_password => $swift_hash[user_password],
|
||||
|
@ -59,11 +63,19 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora
|
|||
controller_node_address => $management_vip,
|
||||
swift_local_net_ip => $storage_address,
|
||||
master_swift_proxy_ip => $master_swift_proxy_ip,
|
||||
proxy_port => $proxy_port,
|
||||
debug => $debug,
|
||||
verbose => $verbose,
|
||||
log_facility => 'LOG_SYSLOG',
|
||||
ceilometer => hiera('use_ceilometer'),
|
||||
ring_min_part_hours => $ring_min_part_hours,
|
||||
} ->
|
||||
|
||||
class { 'openstack::swift::status':
|
||||
endpoint => "http://${storage_address}:${proxy_port}",
|
||||
vip => $management_vip,
|
||||
only_from => "127.0.0.1 240.0.0.2 ${sto_net} ${man_net}",
|
||||
con_timeout => 5
|
||||
}
|
||||
|
||||
class { 'swift::keystone::auth':
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Script to make a HAProxy capable of monitoring the Swift proxy backends status.
|
||||
# This script checks the given management VIP via ICMP and also performs a Swift
|
||||
# healthcheck via the given Swift endpoint with the given connect timeout.
|
||||
# Reports an HTTP 200 OK, if all of the results are OK.
|
||||
# If the healthcheck result was not OK or the Swift endpoint/VIP was not reachable,
|
||||
# it would report an HTTP 503 Error.
|
||||
#
|
||||
# Author: Bogdan Dobrelya <bdobrelia@mirantis.com>
|
||||
#
|
||||
|
||||
if [[ $1 == '-h' || $1 == '--help' || "$#" -ne 3 ]];then
|
||||
echo "Usage: $0 <local_swift_endpoint> <management_vip> <connect_timeout>"
|
||||
exit
|
||||
fi
|
||||
|
||||
# Remove trailing slash
|
||||
url=`echo $1 | sed 's#/*$##'`
|
||||
|
||||
# Check for the management VIP avail.
|
||||
ping -c3 $2 2>&1 >/dev/null
|
||||
rc=$?
|
||||
rc2=1
|
||||
result="ERROR"
|
||||
|
||||
# Also check for the swift healthcheck report via given endpoint url
|
||||
if [[ $rc == 0 ]]; then
|
||||
result=`/usr/bin/curl --connect-timeout ${3} -XGET ${url}/healthcheck 2>/dev/null`
|
||||
rc2=$?
|
||||
fi
|
||||
|
||||
if [[ $result == "OK" && $rc2 == 0 ]] ; then
|
||||
# Swift healthcheck is OK and endpoint is reachable
|
||||
# return HTTP 200. Shell return-code is 0
|
||||
echo -en "HTTP/1.1 200 OK\r\n"
|
||||
echo -en "Content-Type: text/plain\r\n"
|
||||
echo -en "Connection: close\r\n"
|
||||
echo -en "Content-Length: 5\r\n"
|
||||
echo -en "\r\n"
|
||||
echo -en "OK.\r\n"
|
||||
sleep 0.1
|
||||
exit 0
|
||||
else
|
||||
# Swift healthcheck failed or endpoint was not reachable,
|
||||
# return HTTP 503. Shell return-code is 1
|
||||
echo -en "HTTP/1.1 503 Service Unavailable\r\n"
|
||||
echo -en "Content-Type: text/plain\r\n"
|
||||
echo -en "Connection: close\r\n"
|
||||
echo -en "Content-Length: 8\r\n"
|
||||
echo -en "\r\n"
|
||||
echo -en "Error.\r\n"
|
||||
sleep 0.1
|
||||
exit 1
|
||||
fi
|
|
@ -72,6 +72,7 @@ install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-agent-central %{bui
|
|||
install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-alarm-evaluator %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-alarm-evaluator
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/tools/q-agent-cleanup.py %{buildroot}/usr/bin/q-agent-cleanup.py
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/tools/clustercheck %{buildroot}/usr/bin/clustercheck
|
||||
install -m 0755 %{files_source}/fuel-ha-utils/tools/swiftcheck %{buildroot}/usr/bin/swiftcheck
|
||||
install -m 0644 %{files_source}/fuel-ha-utils/tools/wsrepclustercheckrc %{buildroot}/etc/wsrepclustercheckrc
|
||||
install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.py %{buildroot}/usr/bin/rabbit-fence.py
|
||||
install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.init %{buildroot}/etc/init.d/rabbit-fence
|
||||
|
@ -171,6 +172,7 @@ For further information go to http://wiki.openstack.org/Fuel
|
|||
/usr/lib/ocf/resource.d/fuel
|
||||
/usr/bin/q-agent-cleanup.py
|
||||
/usr/bin/clustercheck
|
||||
/usr/bin/swiftcheck
|
||||
%config(noreplace) /etc/wsrepclustercheckrc
|
||||
#
|
||||
|
||||
|
|
Loading…
Reference in New Issue