From b25c0ee4772fdf04fe6f263a8e81571204f706ff Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Sun, 17 Mar 2019 17:48:54 +0000 Subject: [PATCH] Fix MariaDB 10.3 upgrade Upgrading MariaDB from Rocky to Stein currently fails, with the new container left continually restarting. The problem is that the Rocky container does not shutdown cleanly, leaving behind state that the new container cannot recover. The container does not shutdown cleanly because we run dumb-init with a --single-child argument, causing it to forward signals to only the process executed by dumb-init. In our case this is mysqld_safe, which ignores various signals, including SIGTERM. After a (default 10 second) timeout, Docker then kills the container. A Kolla change [1] removes the --single-child argument from dumb-init for the MariaDB container, however we still need to support upgrading from Rocky images that don't have this change. To do that, we add new handlers to execute 'mysqladmin shutdown' to cleanly shutdown the service. A second issue with the current upgrade approach is that we don't execute mysql_upgrade after starting the new service. This can leave the database state using the format of the previous release. This patch also adds handlers to execute mysql_upgrade. [1] https://review.openstack.org/644244 Depends-On: https://review.openstack.org/644244 Depends-On: https://review.openstack.org/645990 Change-Id: I08a655a359ff9cfa79043f2166dca59199c7d67f Closes-Bug: #1820325 --- ansible/roles/mariadb/handlers/main.yml | 152 +++++++++++++++--- .../roles/mariadb/tasks/bootstrap_cluster.yml | 2 +- ansible/roles/mariadb/tasks/config.yml | 16 +- ansible/roles/mariadb/templates/galera.cnf.j2 | 5 + 4 files changed, 138 insertions(+), 37 deletions(-) diff --git a/ansible/roles/mariadb/handlers/main.yml b/ansible/roles/mariadb/handlers/main.yml index d2b16b5aec..d378839824 100644 --- a/ansible/roles/mariadb/handlers/main.yml +++ b/ansible/roles/mariadb/handlers/main.yml @@ -20,11 +20,9 @@ when: - bootstrap_host is defined - bootstrap_host == inventory_hostname + listen: Bootstrap MariaDB cluster notify: - - wait first mariadb container - - restart slave mariadb - - restart master mariadb - + - restart mariadb # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # https://github.com/ansible/ansible-modules-core/issues/2788 @@ -42,12 +40,45 @@ when: - bootstrap_host is defined - bootstrap_host == inventory_hostname + listen: Bootstrap MariaDB cluster + +# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not +# stop on demand, and would result in Docker forcibly killing the container. +# This could lead to a failed upgrade if the new image is unable to recover +# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325. +# TODO(mgoddard): Remove this task in Train. +- name: shutdown slave mariadb + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "start_container" + command: >- + bash -c ' + sudo -E kolla_set_configs && + mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }} + ' + common_options: "{{ docker_common_options }}" + detach: False + name: "mariadb_shutdown" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + labels: + UPGRADE: + restart_policy: "never" + no_log: true + when: + - kolla_action != "config" + - has_cluster | bool + - inventory_hostname != master_host + listen: restart mariadb - name: restart slave mariadb vars: service_name: "mariadb" service: "{{ mariadb_services[service_name] }}" - mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}" become: true kolla_docker: action: "recreate_or_restart_container" @@ -59,15 +90,7 @@ when: - kolla_action != "config" - inventory_hostname != master_host - - inventory_hostname in groups[service.group] - - service.enabled | bool - - mariadb_config_json.changed | bool - or mariadb_galera_conf.changed | bool - or mariadb_wsrep_notify.changed | bool - or mariadb_container.changed | bool - or bootstrap_host is defined - notify: - - wait for slave mariadb + listen: restart mariadb # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # https://github.com/ansible/ansible-modules-core/issues/2788 @@ -85,12 +108,72 @@ when: - kolla_action != "config" - inventory_hostname != master_host + listen: restart mariadb + +- name: run upgrade on slave + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "start_container" + common_options: "{{ docker_common_options }}" + detach: False + dimensions: "{{ service.dimensions }}" + environment: + KOLLA_UPGRADE: + KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" + DB_HOST: "{{ api_interface_address }}" + DB_PORT: "{{ mariadb_port }}" + DB_ROOT_PASSWORD: "{{ database_password }}" + image: "{{ service.image }}" + labels: + UPGRADE: + name: "upgrade_mariadb" + restart_policy: "never" + volumes: "{{ service.volumes }}" + no_log: true + when: + - kolla_action == "upgrade" + - inventory_hostname != master_host + listen: restart mariadb + +# NOTE(mgoddard): In Rocky the MariaDB image had an issue where it would not +# stop on demand, and would result in Docker forcibly killing the container. +# This could lead to a failed upgrade if the new image is unable to recover +# from the crash. See https://bugs.launchpad.net/kolla-ansible/+bug/1820325. +# TODO(mgoddard): Remove this task in Train. +- name: shutdown master mariadb + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "start_container" + command: >- + bash -c ' + sudo -E kolla_set_configs && + mysqladmin shutdown --host={{ api_interface_address }} --user=root --password={{ database_password }} + ' + common_options: "{{ docker_common_options }}" + detach: False + name: "mariadb_shutdown" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + labels: + UPGRADE: + restart_policy: "never" + no_log: true + when: + - kolla_action != "config" + - inventory_hostname == master_host + listen: restart mariadb - name: restart master mariadb vars: service_name: "mariadb" service: "{{ mariadb_services[service_name] }}" - mariadb_container: "{{ check_mariadb_containers.results|selectattr('item.key', 'equalto', service_name)|first }}" become: true kolla_docker: action: "recreate_or_restart_container" @@ -102,15 +185,7 @@ when: - kolla_action != "config" - inventory_hostname == master_host - - inventory_hostname in groups[service.group] - - service.enabled | bool - - mariadb_config_json.changed | bool - or mariadb_galera_conf.changed | bool - or mariadb_wsrep_notify.changed | bool - or mariadb_container.changed | bool - or bootstrap_host is defined - notify: - - Waiting for master mariadb + listen: restart mariadb # TODO(jeffrey4l), remove the task check when the wait_for bug is fixed # https://github.com/ansible/ansible-modules-core/issues/2788 @@ -128,3 +203,32 @@ when: - kolla_action != "config" - inventory_hostname == master_host + listen: restart mariadb + +- name: run upgrade on master + vars: + service_name: "mariadb" + service: "{{ mariadb_services[service_name] }}" + become: true + kolla_docker: + action: "start_container" + common_options: "{{ docker_common_options }}" + detach: False + dimensions: "{{ service.dimensions }}" + environment: + KOLLA_UPGRADE: + KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" + DB_HOST: "{{ api_interface_address }}" + DB_PORT: "{{ mariadb_port }}" + DB_ROOT_PASSWORD: "{{ database_password }}" + image: "{{ service.image }}" + labels: + UPGRADE: + name: "upgrade_mariadb" + restart_policy: "never" + volumes: "{{ service.volumes }}" + no_log: true + when: + - kolla_action == "upgrade" + - inventory_hostname == master_host + listen: restart mariadb diff --git a/ansible/roles/mariadb/tasks/bootstrap_cluster.yml b/ansible/roles/mariadb/tasks/bootstrap_cluster.yml index a5425e5e1f..aeb8bb7fa8 100644 --- a/ansible/roles/mariadb/tasks/bootstrap_cluster.yml +++ b/ansible/roles/mariadb/tasks/bootstrap_cluster.yml @@ -20,7 +20,7 @@ restart_policy: "never" volumes: "{{ service.volumes }}" notify: - - Starting first MariaDB container + - Bootstrap MariaDB cluster - set_fact: bootstrap_host: "{{ inventory_hostname }}" diff --git a/ansible/roles/mariadb/tasks/config.yml b/ansible/roles/mariadb/tasks/config.yml index 83c1a273f9..cb05451974 100644 --- a/ansible/roles/mariadb/tasks/config.yml +++ b/ansible/roles/mariadb/tasks/config.yml @@ -48,13 +48,11 @@ dest: "{{ node_config_directory }}/{{ service_name }}/config.json" mode: "0660" become: true - register: mariadb_config_json when: - inventory_hostname in groups[service.group] - service.enabled | bool notify: - - restart slave mariadb - - restart master mariadb + - restart mariadb - name: Copying over galera.cnf vars: @@ -68,13 +66,11 @@ dest: "{{ node_config_directory }}/{{ service_name }}/galera.cnf" mode: "0660" become: true - register: mariadb_galera_conf when: - inventory_hostname in groups[service.group] - service.enabled | bool notify: - - restart slave mariadb - - restart master mariadb + - restart mariadb - name: Copying over wsrep-notify.sh template: @@ -82,14 +78,12 @@ dest: "{{ node_config_directory }}/{{ item.key }}/wsrep-notify.sh" mode: "0770" become: true - register: mariadb_wsrep_notify when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool with_dict: "{{ mariadb_services }}" notify: - - restart slave mariadb - - restart master mariadb + - restart mariadb - name: Check mariadb containers become: true @@ -100,12 +94,10 @@ image: "{{ item.value.image }}" volumes: "{{ item.value.volumes }}" dimensions: "{{ item.value.dimensions }}" - register: check_mariadb_containers when: - kolla_action != "config" - inventory_hostname in groups[item.value.group] - item.value.enabled | bool with_dict: "{{ mariadb_services }}" notify: - - restart slave mariadb - - restart master mariadb + - restart mariadb diff --git a/ansible/roles/mariadb/templates/galera.cnf.j2 b/ansible/roles/mariadb/templates/galera.cnf.j2 index 7e9611faa3..5ee209cd37 100644 --- a/ansible/roles/mariadb/templates/galera.cnf.j2 +++ b/ansible/roles/mariadb/templates/galera.cnf.j2 @@ -54,5 +54,10 @@ innodb_buffer_pool_size = '{{ dynamic_pool_size_mb }}M' innodb_buffer_pool_size = '8192M' {% endif %} +# The default value for innodb_lock_schedule_algorithm is VATS, but this does +# not work with galera. Set FCFS explicitly to avoid a warning. +# https://mariadb.com/kb/en/library/innodb-system-variables/#innodb_lock_schedule_algorithm. +innodb_lock_schedule_algorithm = FCFS + [server] pid-file=/var/lib/mysql/mariadb.pid