Check keepalived health in the amphora
The health manager did not detect a keepalived failure inside the amphora. This patch will not send a health heartbeat if keepalived is configured but not running. This patch also allows the health checks to continue after an initial failure. Change-Id: Id21310bd5ded3747218d3872ab3c966e5ddf5356 Closes-Bug: #1695090
This commit is contained in:
parent
29219e4345
commit
c7a2babf54
|
@ -6,8 +6,9 @@ Wants=network-online.target
|
|||
[Service]
|
||||
Type=forking
|
||||
KillMode=process
|
||||
ExecStart=/sbin/ip netns exec {{ amphora_nsname }} {{ keepalived_cmd }} -D -d -f {{ keepalived_cfg }}
|
||||
ExecStart=/sbin/ip netns exec {{ amphora_nsname }} {{ keepalived_cmd }} -D -d -f {{ keepalived_cfg }} -p {{ keepalived_pid }}
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
PIDFile={{ keepalived_pid }}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -18,7 +18,7 @@ DAEMON="ip netns exec {{ amphora_nsname }} {{ keepalived_cmd }}"
|
|||
NAME=octavia-keepalived
|
||||
DESC=octavia-keepalived
|
||||
TMPFILES="/tmp/.vrrp /tmp/.healthcheckers"
|
||||
DAEMON_ARGS="-D -d -f {{ keepalived_cfg }}"
|
||||
DAEMON_ARGS="-D -d -f {{ keepalived_cfg }} -p {{ keepalived_pid }}"
|
||||
|
||||
#includes lsb functions
|
||||
. /lib/lsb/init-functions
|
||||
|
@ -36,7 +36,7 @@ case "$1" in
|
|||
do
|
||||
test -e $file && test ! -L $file && rm $file
|
||||
done
|
||||
if start-stop-daemon --start --quiet --pidfile /var/run/$NAME.pid \
|
||||
if start-stop-daemon --start --quiet --pidfile {{ keepalived_pid }} \
|
||||
--exec $DAEMON -- $DAEMON_ARGS; then
|
||||
log_end_msg 0
|
||||
else
|
||||
|
@ -45,7 +45,7 @@ case "$1" in
|
|||
;;
|
||||
stop)
|
||||
log_daemon_msg "Stopping $DESC" "$NAME"
|
||||
if start-stop-daemon --oknodo --stop --quiet --pidfile /var/run/$NAME.pid \
|
||||
if start-stop-daemon --oknodo --stop --quiet --pidfile {{ keepalived_pid }} \
|
||||
--exec $DAEMON; then
|
||||
log_end_msg 0
|
||||
else
|
||||
|
@ -55,7 +55,7 @@ case "$1" in
|
|||
reload|force-reload)
|
||||
log_action_begin_msg "Reloading $DESC configuration..."
|
||||
if start-stop-daemon --stop --quiet --signal 1 --pidfile \
|
||||
/var/run/$NAME.pid --exec $DAEMON; then
|
||||
{{ keepalived_pid }} --exec $DAEMON; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_action_end_msg 1
|
||||
|
@ -65,10 +65,10 @@ case "$1" in
|
|||
log_action_begin_msg "Restarting $DESC" "$NAME"
|
||||
|
||||
start-stop-daemon --stop --quiet --pidfile \
|
||||
/var/run/$NAME.pid --exec $DAEMON || true
|
||||
{{ keepalived_pid }} --exec $DAEMON || true
|
||||
sleep 1
|
||||
if start-stop-daemon --start --quiet --pidfile \
|
||||
/var/run/$NAME.pid --exec $DAEMON -- $DAEMON_ARGS; then
|
||||
{{ keepalived_pid }} --exec $DAEMON -- $DAEMON_ARGS; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
|
|
|
@ -22,4 +22,4 @@ stop on runlevel [!2345]
|
|||
|
||||
respawn
|
||||
|
||||
exec /sbin/ip netns exec {{ amphora_nsname }} {{ keepalived_cmd }} -n -D -d -f {{ keepalived_cfg }}
|
||||
exec /sbin/ip netns exec {{ amphora_nsname }} {{ keepalived_cmd }} -n -D -d -f {{ keepalived_cfg }} -p {{ keepalived_pid }}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import errno
|
||||
import os
|
||||
import time
|
||||
|
||||
|
@ -49,9 +50,48 @@ def list_sock_stat_files(hadir=None):
|
|||
def run_sender(cmd_queue):
|
||||
LOG.info('Health Manager Sender starting.')
|
||||
sender = health_sender.UDPStatusSender()
|
||||
|
||||
keepalived_cfg_path = util.keepalived_cfg_path()
|
||||
keepalived_pid_path = util.keepalived_pid_path()
|
||||
|
||||
while True:
|
||||
message = build_stats_message()
|
||||
sender.dosend(message)
|
||||
|
||||
try:
|
||||
# If the keepalived config file is present check
|
||||
# that it is running, otherwise don't send the health
|
||||
# heartbeat
|
||||
if os.path.isfile(keepalived_cfg_path):
|
||||
# Is there a pid file for keepalived?
|
||||
with open(keepalived_pid_path, 'r') as pid_file:
|
||||
pid = int(pid_file.readline())
|
||||
os.kill(pid, 0)
|
||||
|
||||
message = build_stats_message()
|
||||
sender.dosend(message)
|
||||
|
||||
except IOError as e:
|
||||
# Missing PID file, skip health heartbeat
|
||||
if e.errno == errno.ENOENT:
|
||||
LOG.error('Missing keepalived PID file {0}, skipping '
|
||||
'health heartbeat.'.format(keepalived_pid_path))
|
||||
else:
|
||||
LOG.error('Failed to check keepalived and haproxy status '
|
||||
'due to exception {0}, skipping health '
|
||||
'heartbeat.'.format(str(e)))
|
||||
except OSError as e:
|
||||
# Keepalived is not running, skip health heartbeat
|
||||
if e.errno == errno.ESRCH:
|
||||
LOG.error('Keepalived is configured but not running, skipping '
|
||||
'health heartbeat.'.format(keepalived_pid_path))
|
||||
else:
|
||||
LOG.error('Failed to check keepalived and haproxy status '
|
||||
'due to exception {0}, skipping health '
|
||||
'heartbeat.'.format(str(e)))
|
||||
except Exception as e:
|
||||
LOG.error('Failed to check keepalived and haproxy status '
|
||||
'due to exception {0}, skipping health '
|
||||
'heartbeat.'.format(str(e)))
|
||||
|
||||
try:
|
||||
cmd = cmd_queue.get_nowait()
|
||||
if cmd is 'reload':
|
||||
|
|
|
@ -153,6 +153,8 @@ class TestHealthDaemon(base.TestCase):
|
|||
LISTENER_ID2 + '.sock'}
|
||||
self.assertEqual(files, expected_files)
|
||||
|
||||
@mock.patch('os.kill')
|
||||
@mock.patch('os.path.isfile')
|
||||
@mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.time.sleep')
|
||||
@mock.patch('oslo_config.cfg.CONF.reload_config_files')
|
||||
|
@ -161,25 +163,31 @@ class TestHealthDaemon(base.TestCase):
|
|||
@mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_sender.UDPStatusSender')
|
||||
def test_run_sender(self, mock_UDPStatusSender, mock_build_msg,
|
||||
mock_reload_cfg, mock_sleep):
|
||||
mock_reload_cfg, mock_sleep, mock_isfile, mock_kill):
|
||||
sender_mock = mock.MagicMock()
|
||||
dosend_mock = mock.MagicMock()
|
||||
sender_mock.dosend = dosend_mock
|
||||
mock_UDPStatusSender.return_value = sender_mock
|
||||
mock_build_msg.side_effect = ['TEST', Exception('break')]
|
||||
mock_build_msg.side_effect = ['TEST']
|
||||
|
||||
mock_isfile.return_value = False
|
||||
|
||||
test_queue = queue.Queue()
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
with mock.patch('time.sleep') as mock_sleep:
|
||||
mock_sleep.side_effect = Exception('break')
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
|
||||
sender_mock.dosend.assert_called_once_with('TEST')
|
||||
|
||||
# Test a reload event
|
||||
mock_build_msg.reset_mock()
|
||||
mock_build_msg.side_effect = ['TEST', Exception('break')]
|
||||
mock_build_msg.side_effect = ['TEST']
|
||||
test_queue.put('reload')
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
with mock.patch('time.sleep') as mock_sleep:
|
||||
mock_sleep.side_effect = Exception('break')
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
mock_reload_cfg.assert_called_once_with()
|
||||
|
||||
# Test the shutdown path
|
||||
|
@ -193,10 +201,88 @@ class TestHealthDaemon(base.TestCase):
|
|||
|
||||
# Test an unknown command
|
||||
mock_build_msg.reset_mock()
|
||||
mock_build_msg.side_effect = ['TEST', Exception('break')]
|
||||
mock_build_msg.side_effect = ['TEST']
|
||||
test_queue.put('bogus')
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
with mock.patch('time.sleep') as mock_sleep:
|
||||
mock_sleep.side_effect = Exception('break')
|
||||
self.assertRaisesRegex(Exception, 'break',
|
||||
health_daemon.run_sender, test_queue)
|
||||
|
||||
# Test keepalived config, but no PID
|
||||
mock_build_msg.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_isfile.return_value = True
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open', mock.mock_open()) as mock_open:
|
||||
mock_open.side_effect = FileNotFoundError
|
||||
test_queue.put('shutdown')
|
||||
health_daemon.run_sender(test_queue)
|
||||
mock_build_msg.assert_not_called()
|
||||
dosend_mock.assert_not_called()
|
||||
|
||||
# Test keepalived config, but PID file error
|
||||
mock_build_msg.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_isfile.return_value = True
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open', mock.mock_open()) as mock_open:
|
||||
mock_open.side_effect = IOError
|
||||
test_queue.put('shutdown')
|
||||
health_daemon.run_sender(test_queue)
|
||||
mock_build_msg.assert_not_called()
|
||||
dosend_mock.assert_not_called()
|
||||
|
||||
# Test keepalived config, but bogus PID
|
||||
mock_build_msg.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_isfile.return_value = True
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open',
|
||||
mock.mock_open(read_data='foo')) as mock_open:
|
||||
test_queue.put('shutdown')
|
||||
health_daemon.run_sender(test_queue)
|
||||
mock_build_msg.assert_not_called()
|
||||
dosend_mock.assert_not_called()
|
||||
|
||||
# Test keepalived config, but not running
|
||||
mock_build_msg.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_isfile.return_value = True
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open',
|
||||
mock.mock_open(read_data='999999')) as mock_open:
|
||||
mock_kill.side_effect = ProccessNotFoundError
|
||||
test_queue.put('shutdown')
|
||||
health_daemon.run_sender(test_queue)
|
||||
mock_build_msg.assert_not_called()
|
||||
dosend_mock.assert_not_called()
|
||||
|
||||
# Test keepalived config, but process error
|
||||
mock_build_msg.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_isfile.return_value = True
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open',
|
||||
mock.mock_open(read_data='999999')) as mock_open:
|
||||
mock_kill.side_effect = OSError
|
||||
test_queue.put('shutdown')
|
||||
health_daemon.run_sender(test_queue)
|
||||
mock_build_msg.assert_not_called()
|
||||
dosend_mock.assert_not_called()
|
||||
|
||||
# Test with happy keepalive
|
||||
sender_mock.reset_mock()
|
||||
dosend_mock.reset_mock()
|
||||
mock_kill.side_effect = [True]
|
||||
mock_build_msg.reset_mock()
|
||||
mock_build_msg.side_effect = ['TEST', 'TEST']
|
||||
mock_isfile.return_value = True
|
||||
test_queue.put('shutdown')
|
||||
with mock.patch('octavia.amphorae.backends.health_daemon.'
|
||||
'health_daemon.open',
|
||||
mock.mock_open(read_data='999999')) as mock_open:
|
||||
health_daemon.run_sender(test_queue)
|
||||
sender_mock.dosend.assert_called_once_with('TEST')
|
||||
|
||||
@mock.patch('octavia.amphorae.backends.utils.haproxy_query.HAProxyQuery')
|
||||
def test_get_stats(self, mock_query):
|
||||
|
@ -266,3 +352,11 @@ class TestHealthDaemon(base.TestCase):
|
|||
msg = health_daemon.build_stats_message()
|
||||
|
||||
self.assertEqual(msg['listeners'][LISTENER_ID1]['pools'], {})
|
||||
|
||||
|
||||
class FileNotFoundError(IOError):
|
||||
errno = 2
|
||||
|
||||
|
||||
class ProccessNotFoundError(OSError):
|
||||
errno = 3
|
||||
|
|
Loading…
Reference in New Issue