Change the galera health check for better cluster health

The current galera cluster health check simply logs into a cluster node but does not check if the node is sync'd. This can lead to an issue where a node is placed back into the pool before it is ready. If this happens it can lead to a broken OpenStack environment until the wsrep received queue is processed which is especially true if the node out of sync happens to be the primary. Combined backport of: - https://review.openstack.org/520673 - https://review.openstack.org/523854 - https://review.openstack.org/524107 Closes-Bug: #1665667 Change-Id: I49e371a2743618a0b5544a23e892aa28bb8567eb Depends-On: I81c924464aa4b19c2a62f37b5bf26c3c0453786a Depends-On: Ie1b3b9724dd33de1d90634166e585ecceb1f4c96 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com> (cherry picked from commit 8c0ce1c62f)
2017-11-16 11:59:21 -06:00 · 2017-11-16 11:59:21 -06:00 · 7e7aa69f7e
parent 6a43a775ea
commit 7e7aa69f7e
3 changed files with 9 additions and 3 deletions
--- a/ansible-role-requirements.yml
+++ b/ansible-role-requirements.yml
@ -17,7 +17,7 @@
 - name: galera_server
  scm: git
  src: https://git.openstack.org/openstack/openstack-ansible-galera_server
-  version: b586b9fa1ee7964e0a192b5ea9682d7bcbb14f1e
+  version: 515ab131fa27b7e66f75abb4c886b5b57ca9d41b
 - name: ceph_client
  scm: git
  src: https://git.openstack.org/openstack/openstack-ansible-ceph_client
@ -25,7 +25,7 @@
 - name: haproxy_server
  scm: git
  src: https://git.openstack.org/openstack/openstack-ansible-haproxy_server
-  version: 177aa76c75468aca594e867a7ab54595358ecce8
+  version: 43d4290787c2ee600346182c9cda8ccc77d5e4c6
 - name: keepalived
  scm: git
  src: https://github.com/evrardjp/ansible-keepalived
--- a/playbooks/inventory/group_vars/galera_all.yml
+++ b/playbooks/inventory/group_vars/galera_all.yml
@ -21,3 +21,8 @@ galera_server_package_state: "{{ package_state }}"
 # Disable PrivateDevices for MariaDB on CentOS 7
 # See https://bugs.launchpad.net/openstack-ansible/+bug/1697531 for details.
 galera_disable_privatedevices: "{{ ((properties.is_metal | default(false)) | bool) | ternary('false', 'true') }}"
+
+# By default galera_monitoring xinetd app is open to 0.0.0.0/0
+# This makes sure the monitoring is only restricted to the necessary nodes:
+# the load balancers, and the galera nodes.
+galera_monitoring_allowed_source: "{% for node in groups['galera_all'] + groups['haproxy_all'] %}{{ hostvars[node]['ansible_host'] }} {% endfor %} 127.0.0.1"
--- a/playbooks/vars/configs/haproxy_config.yml
+++ b/playbooks/vars/configs/haproxy_config.yml
@ -19,11 +19,12 @@ haproxy_default_services:
      haproxy_backup_nodes: "{{ groups['galera_all'][1:] | default([]) }}"
      haproxy_bind: "{{ [internal_lb_vip_address] }}"
      haproxy_port: 3306
+      haproxy_check_port: 9200
      haproxy_balance_type: tcp
      haproxy_timeout_client: 5000s
      haproxy_timeout_server: 5000s
      haproxy_backend_options:
-        - "mysql-check user {{ galera_monitoring_user }}"
+        - "httpchk HEAD /"
      haproxy_whitelist_networks: "{{ haproxy_galera_whitelist_networks }}"
  - service:
      haproxy_service_name: repo_git