From 42df864217042cd2be03fde25ba6c235835d4835 Mon Sep 17 00:00:00 2001 From: Vladimir Kuklin Date: Mon, 1 Jun 2015 17:57:41 +0300 Subject: [PATCH] Make HAProxy check of swift proxy backends via management VIP W/o this fix, when the management interface on the controller node running a Swift proxy is down, HAProxy would fail to update its backend status at the storage network. This is a problem as we want swift backends not able to connect the swift endpoint via the management VIP to be marked down. Othewise, responces time for any requested swift commands would be drastically longer. Simple httpcheck option cannot resolve this because the swift healthcheck reports OK, if conntacted via the storage network. In order to fix this, simple healthcheck script is implemented. This script is running as HTTP xinetid service at TCP port 49001 and is accessible only from the localhost, 240.0.0.2, and storage plus management networks. The service verifies the node under check for the: a) management VIP is pingable via ICMP (by 3 packets) b) Swift endpoint is reachable by TCP-connect via the local storage address within 5 seconds connection timeout c) Swift healthcheck report via the local storage address endpoint is OK Reports an HTTP 200 OK, if all of the results are OK. Otherwise, it would report an HTTP 503 Error. Expected Swift node control plane failover time will be around 30 seconds. Swift data plane is not affected. DocImpact: Reference architecture, swift failover. Closes-bug: #1459772 Related-bug: #1460623 Change-Id: I55a35b45257763a20f33bd47cb5c57de53558ccf Signed-off-by: Bogdan Dobrelya --- debian/fuel-ha-utils.install | 1 + .../puppet/openstack/manifests/firewall.pp | 4 +- .../puppet/openstack/manifests/ha/swift.pp | 4 ++ .../openstack/manifests/reserved_ports.pp | 2 +- .../openstack/manifests/swift/status.pp | 71 +++++++++++++++++++ .../modular/netconfig/netconfig.pp | 2 +- .../osnailyfacter/modular/swift/swift.pp | 12 ++++ files/fuel-ha-utils/tools/swiftcheck | 55 ++++++++++++++ specs/fuel-library6.1.spec | 2 + 9 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 deployment/puppet/openstack/manifests/swift/status.pp create mode 100644 files/fuel-ha-utils/tools/swiftcheck diff --git a/debian/fuel-ha-utils.install b/debian/fuel-ha-utils.install index f8e3e75ec6..14972f4f50 100644 --- a/debian/fuel-ha-utils.install +++ b/debian/fuel-ha-utils.install @@ -2,4 +2,5 @@ files/fuel-ha-utils/ocf/* /usr/lib/ocf/resource.d/fuel files/fuel-ha-utils/tools/q-agent-cleanup.py /usr/bin files/fuel-ha-utils/tools/wsrepclustercheckrc /etc files/fuel-ha-utils/tools/clustercheck /usr/bin +files/fuel-ha-utils/tools/swiftcheck /usr/bin diff --git a/deployment/puppet/openstack/manifests/firewall.pp b/deployment/puppet/openstack/manifests/firewall.pp index 3bfecd0256..a8d2ee0649 100644 --- a/deployment/puppet/openstack/manifests/firewall.pp +++ b/deployment/puppet/openstack/manifests/firewall.pp @@ -8,6 +8,7 @@ class openstack::firewall ( $mysql_gcomm_port = 4567, $galera_ist_port = 4568, $galera_clustercheck_port = 49000, + $swift_proxy_check_port = 49001, $keystone_public_port = 5000, $swift_proxy_port = 8080, $swift_object_port = 6000, @@ -104,7 +105,8 @@ class openstack::firewall ( } firewall {'103 swift': - port => [$swift_proxy_port, $swift_object_port, $swift_container_port, $swift_account_port], + port => [$swift_proxy_port, $swift_object_port, $swift_container_port, + $swift_account_port, $swift_proxy_check_port], proto => 'tcp', action => 'accept', } diff --git a/deployment/puppet/openstack/manifests/ha/swift.pp b/deployment/puppet/openstack/manifests/ha/swift.pp index dfc50f2f34..d8a0e43f2c 100644 --- a/deployment/puppet/openstack/manifests/ha/swift.pp +++ b/deployment/puppet/openstack/manifests/ha/swift.pp @@ -9,5 +9,9 @@ class openstack::ha::swift ( server_names => filter_hash($servers, 'name'), ipaddresses => filter_hash($servers, 'storage_address'), public => true, + haproxy_config_options => { + 'option' => ['httpchk', 'httplog', 'httpclose'], + }, + balancermember_options => 'check port 49001 inter 15s fastinter 2s downinter 8s rise 3 fall 3', } } diff --git a/deployment/puppet/openstack/manifests/reserved_ports.pp b/deployment/puppet/openstack/manifests/reserved_ports.pp index d234f2055a..f320f6a6ca 100644 --- a/deployment/puppet/openstack/manifests/reserved_ports.pp +++ b/deployment/puppet/openstack/manifests/reserved_ports.pp @@ -16,7 +16,7 @@ # class { 'openstack::reserved_ports': } # -class openstack::reserved_ports ( $ports = '49000,35357,41055,58882' ) { +class openstack::reserved_ports ( $ports = '49000,49001,35357,41055,58882' ) { sysctl::value { 'net.ipv4.ip_local_reserved_ports': value => $ports } } diff --git a/deployment/puppet/openstack/manifests/swift/status.pp b/deployment/puppet/openstack/manifests/swift/status.pp new file mode 100644 index 0000000000..2493d3a427 --- /dev/null +++ b/deployment/puppet/openstack/manifests/swift/status.pp @@ -0,0 +1,71 @@ +# == Class: openstack::swift::status +# +# Configures a script that will check the health +# of swift proxy backend via given endpoint, assumes swift module is in catalog +# +# === Parameters: +# +# [*address*] +# (optional) xinet.d bind address for swiftcheck +# Defaults to 0.0.0.0 +# +# [*only_from*] +# (optional) xinet.d only_from address for swiftcheck +# Defaults to 127.0.0.1 +# +# [*port*] +# (optional) Port for swift check service +# Defaults to 49001 +# +# [*endpoint*] +# (optional) The Swift endpoint host for swift healthcheck +# Defaults to http://127.0.0.1:8080 +# +# [*vip*] +# (optional) The VIP address for the ICMP connectivity check +# Defaults to 127.0.0.1 +# +# [*con_timeout*] +# (optional) The timeout for Swift endpoint connection for swift healthcheck +# Defaults to 5 seconds +# + +class openstack::swift::status ( + $address = '0.0.0.0', + $only_from = '127.0.0.1', + $port = '49001', + $endpoint = 'http://127.0.0.1:8080', + $vip = '127.0.0.1', + $con_timeout = '5', +) { + + augeas { 'swiftcheck': + context => '/files/etc/services', + changes => [ + "set /files/etc/services/service-name[port = '${port}']/port ${port}", + "set /files/etc/services/service-name[port = '${port}'] swiftcheck", + "set /files/etc/services/service-name[port = '${port}']/protocol tcp", + "set /files/etc/services/service-name[port = '${port}']/#comment 'Swift Health Check'", + ], + } + + $group = $::osfamily ? { + 'redhat' => 'nobody', + 'debian' => 'nogroup', + default => 'nobody', + } + + include xinetd + xinetd::service { 'swiftcheck': + bind => $address, + port => $port, + only_from => $only_from, + cps => '512 10', + per_source => 'UNLIMITED', + server => '/usr/bin/swiftcheck', + server_args => "${endpoint} ${vip} ${con_timeout}", + user => 'nobody', + group => $group, + flags => 'IPv4', + } +} diff --git a/deployment/puppet/osnailyfacter/modular/netconfig/netconfig.pp b/deployment/puppet/osnailyfacter/modular/netconfig/netconfig.pp index 7719ef25bf..3db7a8129a 100644 --- a/deployment/puppet/osnailyfacter/modular/netconfig/netconfig.pp +++ b/deployment/puppet/osnailyfacter/modular/netconfig/netconfig.pp @@ -14,7 +14,7 @@ sysctl::value { 'net.ipv4.conf.all.arp_accept': value => '1' } sysctl::value { 'net.ipv4.conf.default.arp_accept': value => '1' } # setting kernel reserved ports -# defaults are 49000,35357,41055,58882 +# defaults are 49000,49001,35357,41055,58882 class { 'openstack::reserved_ports': } ### TCP connections keepalives and failover related parameters ### diff --git a/deployment/puppet/osnailyfacter/modular/swift/swift.pp b/deployment/puppet/osnailyfacter/modular/swift/swift.pp index 5066352855..55fbad2b34 100644 --- a/deployment/puppet/osnailyfacter/modular/swift/swift.pp +++ b/deployment/puppet/osnailyfacter/modular/swift/swift.pp @@ -1,6 +1,8 @@ notice('MODULAR: swift.pp') $swift_hash = hiera('swift_hash') +$proxy_port = hiera('proxy_port', '8080') +$network_scheme = hiera('network_scheme', {}) $storage_hash = hiera('storage_hash') $mp_hash = hiera('mp') $management_vip = hiera('management_vip') @@ -50,6 +52,8 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora } $ring_part_power = calc_ring_part_power($controllers,$swift_hash['resize_value']) + $sto_net = $network_scheme['endpoints']['br-storage']['IP'] + $man_net = $network_scheme['endpoints']['br-mgmt']['IP'] class { 'openstack::swift::proxy': swift_user_password => $swift_hash[user_password], @@ -59,11 +63,19 @@ if !($storage_hash['images_ceph'] and $storage_hash['objects_ceph']) and !$stora controller_node_address => $management_vip, swift_local_net_ip => $storage_address, master_swift_proxy_ip => $master_swift_proxy_ip, + proxy_port => $proxy_port, debug => $debug, verbose => $verbose, log_facility => 'LOG_SYSLOG', ceilometer => hiera('use_ceilometer'), ring_min_part_hours => $ring_min_part_hours, + } -> + + class { 'openstack::swift::status': + endpoint => "http://${storage_address}:${proxy_port}", + vip => $management_vip, + only_from => "127.0.0.1 240.0.0.2 ${sto_net} ${man_net}", + con_timeout => 5 } class { 'swift::keystone::auth': diff --git a/files/fuel-ha-utils/tools/swiftcheck b/files/fuel-ha-utils/tools/swiftcheck new file mode 100644 index 0000000000..d4a23292c1 --- /dev/null +++ b/files/fuel-ha-utils/tools/swiftcheck @@ -0,0 +1,55 @@ +#!/bin/bash +# +# Script to make a HAProxy capable of monitoring the Swift proxy backends status. +# This script checks the given management VIP via ICMP and also performs a Swift +# healthcheck via the given Swift endpoint with the given connect timeout. +# Reports an HTTP 200 OK, if all of the results are OK. +# If the healthcheck result was not OK or the Swift endpoint/VIP was not reachable, +# it would report an HTTP 503 Error. +# +# Author: Bogdan Dobrelya +# + +if [[ $1 == '-h' || $1 == '--help' || "$#" -ne 3 ]];then + echo "Usage: $0 " + exit +fi + +# Remove trailing slash +url=`echo $1 | sed 's#/*$##'` + +# Check for the management VIP avail. +ping -c3 $2 2>&1 >/dev/null +rc=$? +rc2=1 +result="ERROR" + +# Also check for the swift healthcheck report via given endpoint url +if [[ $rc == 0 ]]; then + result=`/usr/bin/curl --connect-timeout ${3} -XGET ${url}/healthcheck 2>/dev/null` + rc2=$? +fi + +if [[ $result == "OK" && $rc2 == 0 ]] ; then + # Swift healthcheck is OK and endpoint is reachable + # return HTTP 200. Shell return-code is 0 + echo -en "HTTP/1.1 200 OK\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 5\r\n" + echo -en "\r\n" + echo -en "OK.\r\n" + sleep 0.1 + exit 0 +else + # Swift healthcheck failed or endpoint was not reachable, + # return HTTP 503. Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 8\r\n" + echo -en "\r\n" + echo -en "Error.\r\n" + sleep 0.1 + exit 1 +fi diff --git a/specs/fuel-library6.1.spec b/specs/fuel-library6.1.spec index a0f4cce914..8bb5abfbbf 100644 --- a/specs/fuel-library6.1.spec +++ b/specs/fuel-library6.1.spec @@ -72,6 +72,7 @@ install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-agent-central %{bui install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-alarm-evaluator %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-alarm-evaluator install -m 0755 %{files_source}/fuel-ha-utils/tools/q-agent-cleanup.py %{buildroot}/usr/bin/q-agent-cleanup.py install -m 0755 %{files_source}/fuel-ha-utils/tools/clustercheck %{buildroot}/usr/bin/clustercheck +install -m 0755 %{files_source}/fuel-ha-utils/tools/swiftcheck %{buildroot}/usr/bin/swiftcheck install -m 0644 %{files_source}/fuel-ha-utils/tools/wsrepclustercheckrc %{buildroot}/etc/wsrepclustercheckrc install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.py %{buildroot}/usr/bin/rabbit-fence.py install -m 0755 %{files_source}/rabbit-fence/rabbit-fence.init %{buildroot}/etc/init.d/rabbit-fence @@ -171,6 +172,7 @@ For further information go to http://wiki.openstack.org/Fuel /usr/lib/ocf/resource.d/fuel /usr/bin/q-agent-cleanup.py /usr/bin/clustercheck +/usr/bin/swiftcheck %config(noreplace) /etc/wsrepclustercheckrc #