From 32230e8cd982367940a9b68f65f1e73d6f14d2c4 Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Thu, 16 Nov 2017 11:59:21 -0600 Subject: [PATCH] Change the galera health check for better cluster health The current galera cluster health check simply logs into a cluster node but does not check if the node is sync'd. This can lead to an issue where a node is placed back into the pool before it is ready. If this happens it can lead to a broken OpenStack environment until the wsrep received queue is processed which is especially true if the node out of sync happens to be the primary. Combined backport of: - https://review.openstack.org/520673 - https://review.openstack.org/523854 - https://review.openstack.org/524107 Closes-Bug: #1665667 Change-Id: I49e371a2743618a0b5544a23e892aa28bb8567eb Depends-On: I81c924464aa4b19c2a62f37b5bf26c3c0453786a Depends-On: Ie1b3b9724dd33de1d90634166e585ecceb1f4c96 Signed-off-by: Kevin Carter (cherry picked from commit 8c0ce1c62f73f880ed255b20ea932852288d23e9) --- ansible-role-requirements.yml | 4 ++-- playbooks/inventory/group_vars/galera_all.yml | 6 ++++++ playbooks/vars/configs/haproxy_config.yml | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ansible-role-requirements.yml b/ansible-role-requirements.yml index 04f08345fb..badaad4d35 100644 --- a/ansible-role-requirements.yml +++ b/ansible-role-requirements.yml @@ -17,7 +17,7 @@ - name: galera_server scm: git src: https://git.openstack.org/openstack/openstack-ansible-galera_server - version: 91d463b9b54ed777738350b0540d0502bfb5ee20 + version: f46436b376d1f21cd541c62dd010a6cc5ff5c8bf - name: ceph_client scm: git src: https://git.openstack.org/openstack/openstack-ansible-ceph_client @@ -25,7 +25,7 @@ - name: haproxy_server scm: git src: https://git.openstack.org/openstack/openstack-ansible-haproxy_server - version: 79151d4187b55ec066e43c5be115ff9726ad772c + version: 44ba7a0e96c65a96d3241726f3d9ccfa9b391b63 - name: keepalived scm: git src: https://github.com/evrardjp/ansible-keepalived diff --git a/playbooks/inventory/group_vars/galera_all.yml b/playbooks/inventory/group_vars/galera_all.yml index 0de04f0846..386546cd8c 100644 --- a/playbooks/inventory/group_vars/galera_all.yml +++ b/playbooks/inventory/group_vars/galera_all.yml @@ -17,3 +17,9 @@ galera_client_drop_config_file: true # Ensure that the package state matches the global setting galera_server_package_state: "{{ package_state }}" + +# By default galera_monitoring xinetd app is open to 0.0.0.0/0 +# This makes sure the monitoring is only restricted to the necessary nodes: +# the load balancers, and the galera nodes. +galera_monitoring_allowed_source: "{% for node in groups['galera_all'] + groups['haproxy_all'] %}{{ hostvars[node]['ansible_host'] }} {% endfor %} 127.0.0.1" + diff --git a/playbooks/vars/configs/haproxy_config.yml b/playbooks/vars/configs/haproxy_config.yml index 6ab9099b4f..cd75fad70e 100644 --- a/playbooks/vars/configs/haproxy_config.yml +++ b/playbooks/vars/configs/haproxy_config.yml @@ -19,11 +19,12 @@ haproxy_default_services: haproxy_backup_nodes: "{{ groups['galera_all'][1:] | default([]) }}" haproxy_bind: "{{ [internal_lb_vip_address] }}" haproxy_port: 3306 + haproxy_check_port: 9200 haproxy_balance_type: tcp haproxy_timeout_client: 5000s haproxy_timeout_server: 5000s haproxy_backend_options: - - "mysql-check user {{ galera_monitoring_user }}" + - "httpchk HEAD /" haproxy_whitelist_networks: "{{ haproxy_galera_whitelist_networks }}" - service: haproxy_service_name: repo_git