Change the galera health check for better cluster health

The current galera cluster health check simply logs into a cluster node
but does not check if the node is sync'd. This can lead to an issue
where a node is placed back into the pool before it is ready. If this
happens it can lead to a broken OpenStack environment until the wsrep
received queue is processed which is especially true if the node out of
sync happens to be the primary.

Combined backport of:
- https://review.openstack.org/520673
- https://review.openstack.org/523854
- https://review.openstack.org/524107

Closes-Bug: #1665667
Change-Id: I49e371a2743618a0b5544a23e892aa28bb8567eb
Depends-On: I81c924464aa4b19c2a62f37b5bf26c3c0453786a
Depends-On: Ie1b3b9724dd33de1d90634166e585ecceb1f4c96
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
(cherry picked from commit 8c0ce1c62f)
This commit is contained in:
Kevin Carter 2017-11-16 11:59:21 -06:00 committed by Jesse Pretorius
parent 0340445e08
commit 32230e8cd9
3 changed files with 10 additions and 3 deletions

View File

@ -17,7 +17,7 @@
- name: galera_server
scm: git
src: https://git.openstack.org/openstack/openstack-ansible-galera_server
version: 91d463b9b54ed777738350b0540d0502bfb5ee20
version: f46436b376d1f21cd541c62dd010a6cc5ff5c8bf
- name: ceph_client
scm: git
src: https://git.openstack.org/openstack/openstack-ansible-ceph_client
@ -25,7 +25,7 @@
- name: haproxy_server
scm: git
src: https://git.openstack.org/openstack/openstack-ansible-haproxy_server
version: 79151d4187b55ec066e43c5be115ff9726ad772c
version: 44ba7a0e96c65a96d3241726f3d9ccfa9b391b63
- name: keepalived
scm: git
src: https://github.com/evrardjp/ansible-keepalived

View File

@ -17,3 +17,9 @@ galera_client_drop_config_file: true
# Ensure that the package state matches the global setting
galera_server_package_state: "{{ package_state }}"
# By default galera_monitoring xinetd app is open to 0.0.0.0/0
# This makes sure the monitoring is only restricted to the necessary nodes:
# the load balancers, and the galera nodes.
galera_monitoring_allowed_source: "{% for node in groups['galera_all'] + groups['haproxy_all'] %}{{ hostvars[node]['ansible_host'] }} {% endfor %} 127.0.0.1"

View File

@ -19,11 +19,12 @@ haproxy_default_services:
haproxy_backup_nodes: "{{ groups['galera_all'][1:] | default([]) }}"
haproxy_bind: "{{ [internal_lb_vip_address] }}"
haproxy_port: 3306
haproxy_check_port: 9200
haproxy_balance_type: tcp
haproxy_timeout_client: 5000s
haproxy_timeout_server: 5000s
haproxy_backend_options:
- "mysql-check user {{ galera_monitoring_user }}"
- "httpchk HEAD /"
haproxy_whitelist_networks: "{{ haproxy_galera_whitelist_networks }}"
- service:
haproxy_service_name: repo_git