From e26b56adfa0dda60a3c8595dd249d902d0ed0c03 Mon Sep 17 00:00:00 2001 From: Dmitriy Rabotyagov Date: Wed, 12 Apr 2023 14:19:24 +0200 Subject: [PATCH] Stop gathering local software_versions for services With latest ansible-core playbooks started failing on adding extra compute or controller nodes, when cinder/nova playbooks run with limits. This happens as we're trying to reply on local facts for hosts that are expired. At the same time, it's not always possible to collect them, as some computes can be down while adding another one. With that we're simplifying flow and avoid old process of restarting services or executing migrations based on local facts. Depends-On: https://review.opendev.org/c/openstack/openstack-ansible-os_nova/+/880147 Depends-On: https://review.opendev.org/c/openstack/openstack-ansible-os_cinder/+/880210 Closes-Bug: #2009834 Change-Id: I44dc8567e9a93f91327202de1bf88a067266711d --- playbooks/common-tasks/restart-service.yml | 47 ------- playbooks/os-cinder-install.yml | 151 --------------------- playbooks/os-nova-install.yml | 136 ------------------- 3 files changed, 334 deletions(-) delete mode 100644 playbooks/common-tasks/restart-service.yml diff --git a/playbooks/common-tasks/restart-service.yml b/playbooks/common-tasks/restart-service.yml deleted file mode 100644 index 9410ca6cb0..0000000000 --- a/playbooks/common-tasks/restart-service.yml +++ /dev/null @@ -1,47 +0,0 @@ ---- -# Copyright 2017, Rackspace US, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This is a generic task set which can be used to execute -# a service action on target hosts for any services. This -# is useful for executing a SIGHUP (reload) to load up any -# configuration changes or to restart services as required. -# -# Inputs: -# - service_name: Any service found matching this prefix will be acted on. -# - service_action: The action to execute [stop, start, restart, reload]. - -- name: Gather service list - shell: "systemctl list-unit-files --state=enabled --type=service | awk '/^{{ service_name }}.* enabled$/ {print $1}'" - args: - executable: "/bin/bash" - register: _enabled_services - changed_when: false - tags: - - skip_ansible_lint - -- name: Execute service action - service: - name: "{{ service_file }}" - state: "{{ service_action }}" - with_items: "{{ (_enabled_services.stdout_lines | difference(service_negate | default([]))) | list }}" - loop_control: - loop_var: service_file - -- name: Disable the service restart requirement - ini_file: - dest: "/etc/ansible/facts.d/openstack_ansible.fact" - section: "{{ service_fact | default(service_name) }}" - option: need_service_restart - value: False diff --git a/playbooks/os-cinder-install.yml b/playbooks/os-cinder-install.yml index 1d763673c9..26d973520a 100644 --- a/playbooks/os-cinder-install.yml +++ b/playbooks/os-cinder-install.yml @@ -42,154 +42,3 @@ vars: cinder_hosts: "cinder_api" cinder_serial: "{{ cinder_api_serial | default(['1', '100%']) }}" - - - -# These facts are set against the deployment host to ensure that -# they are fast to access. This is done in preference to setting -# them against each target as the hostvars extraction will take -# a long time if executed against a large inventory. -- name: Refresh local facts after all software changes are made - hosts: cinder_all - gather_facts: no - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - cinder - tasks: - - name: refresh local facts - setup: - filter: ansible_local - gather_subset: "!all" - - # This variable contains the values of the local fact set for the cinder - # venv tag for all hosts in the 'cinder_all' host group. - - name: Gather software version list - set_fact: - cinder_all_software_versions: "{{ (groups['cinder_all'] | - map('extract', hostvars, ['ansible_local', 'openstack_ansible', 'cinder', 'venv_tag'])) | - list }}" - delegate_to: localhost - run_once: yes - - # This variable outputs a boolean value which is True when - # cinder_all_software_versions contains a list of defined - # values. If they are not defined, it means that not all - # hosts have their software deployed yet. - - name: Set software deployed fact - set_fact: - cinder_all_software_deployed: "{{ (cinder_all_software_versions | select('defined')) | list == cinder_all_software_versions }}" - delegate_to: localhost - run_once: yes - - # This variable outputs a boolean when all the values in - # cinder_all_software_versions are the same and the software - # has been deployed to all hosts in the group. - - name: Set software updated fact - set_fact: - cinder_all_software_updated: "{{ ((cinder_all_software_versions | unique) | length == 1) and (cinder_all_software_deployed | bool) }}" - delegate_to: localhost - run_once: yes - - - -- name: Restart cinder agents to ensure new RPC object version is used - hosts: cinder_backup,cinder_volume,cinder_scheduler - gather_facts: no - serial: "{{ cinder_backend_serial | default(['1', '100%']) }}" - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - cinder - tasks: - - name: Execute cinder service reload - include_tasks: common-tasks/restart-service.yml - vars: - service_name: "{{ item.name }}" - service_action: "{{ item.action }}" - service_fact: "cinder" - with_items: - - { name: "cinder-scheduler", action: "restarted" } - - { name: "cinder-volume", action: "reloaded" } - - { name: "cinder-backup", action: "reloaded" } - when: - - "cinder_all_software_updated is defined" - - "cinder_all_software_updated | bool" - - "ansible_local['openstack_ansible']['cinder']['need_service_restart'] | bool" - - - -- name: Restart cinder API to ensure new RPC object version is used - hosts: cinder_api - gather_facts: no - serial: "{{ cinder_api_serial | default(['1','100%']) }}" - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - cinder - tasks: - # In order to ensure that the service restart does not - # cause an unexpected outage, we drain the load balancer - # back end for this container. - - include_tasks: common-tasks/haproxy-endpoint-manage.yml - vars: - haproxy_state: disabled - when: - - "cinder_all_software_updated | bool" - - "ansible_local['openstack_ansible']['cinder']['need_service_restart'] | bool" - - "groups['cinder_api'] | length > 1" - - - name: Execute cinder service restart - include_tasks: common-tasks/restart-service.yml - vars: - service_name: "cinder-api" - service_action: "restarted" - service_fact: "cinder" - when: - - "cinder_all_software_updated | bool" - - "ansible_local['openstack_ansible']['cinder']['need_service_restart'] | bool" - - # Now that service restart is done, we can set - # the load balancer back end for this container - # to available again. - - include_tasks: common-tasks/haproxy-endpoint-manage.yml - vars: - haproxy_state: enabled - when: "groups['cinder_api'] | length > 1" - - - -- name: Perform online database migrations - hosts: cinder_api[0] - gather_facts: no - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - cinder - tasks: - - name: Perform online data migrations - command: "{{ cinder_bin }}/cinder-manage db online_data_migrations" - become: yes - become_user: "{{ cinder_system_user_name | default('cinder') }}" - when: - - "cinder_all_software_updated | bool" - - "ansible_local['openstack_ansible']['cinder']['need_online_data_migrations'] | bool" - changed_when: false - register: data_migrations - - - name: Disable the online migrations requirement - ini_file: - dest: "/etc/ansible/facts.d/openstack_ansible.fact" - section: cinder - option: need_online_data_migrations - value: False - when: - - data_migrations is succeeded diff --git a/playbooks/os-nova-install.yml b/playbooks/os-nova-install.yml index 3d29322988..cfd4990aa6 100644 --- a/playbooks/os-nova-install.yml +++ b/playbooks/os-nova-install.yml @@ -25,139 +25,3 @@ vars: nova_hosts: "nova_compute:!nova_conductor:!nova_scheduler:!nova_api_os_compute:!nova_api_metadata:!nova_console" nova_serial: "{{ nova_compute_serial | default('100%') }}" - - - -# These facts are set against the deployment host to ensure that -# they are fast to access. This is done in preference to setting -# them against each target as the hostvars extraction will take -# a long time if executed against a large inventory. -- name: Refresh local facts after all software changes are made - hosts: nova_all - gather_facts: no - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - nova - tasks: - - name: refresh local facts - setup: - filter: ansible_local - gather_subset: "!all" - - # This variable contains the values of the local fact set for the nova - # venv tag for all hosts in the 'nova_all' host group. - - name: Gather software version list - set_fact: - nova_all_software_versions: "{{ (groups['nova_all'] | map('extract', hostvars, ['ansible_local', 'openstack_ansible', 'nova', 'venv_tag'])) | list }}" - delegate_to: localhost - run_once: yes - - # This variable outputs a boolean value which is True when - # nova_all_software_versions contains a list of defined - # values. If they are not defined, it means that not all - # hosts have their software deployed yet. - - name: Set software deployed fact - set_fact: - nova_all_software_deployed: "{{ (nova_all_software_versions | select('defined')) | list == nova_all_software_versions }}" - delegate_to: localhost - run_once: yes - - # This variable outputs a boolean when all the values in - # nova_all_software_versions are the same and the software - # has been deployed to all hosts in the group. - - name: Set software updated fact - set_fact: - nova_all_software_updated: "{{ ((nova_all_software_versions | unique) | length == 1) and (nova_all_software_deployed | bool) }}" - delegate_to: localhost - run_once: yes - - -# Note that the console services do not understand how to reload, so they fail -# when you try to make them do so. We therefore restart them instead. -- name: Reload all nova services which support a reload to ensure new RPC object version is used - hosts: "nova_all:!nova_console" - gather_facts: no - serial: "{{ nova_serial | default('100%') }}" - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - nova - tasks: - - name: Execute nova service reload - import_tasks: common-tasks/restart-service.yml - vars: - service_name: "nova" - # NOTE(mnaser): There is a bug in oslo.service which actually restarts - # the entire service instead of resetting it, but it - # leaves Nova in a broken state. This should be moved - # to "reloaded" once it's resolved. - # - # https://bugs.launchpad.net/openstack-ansible/+bug/1715374 - # - service_action: "restarted" - service_negate: "{{ ['nova-novncproxy.service', 'nova-spicehtml5proxy.service' ] + nova_service_negate | default([]) }}" - when: - - "nova_all_software_updated | bool" - - "ansible_local['openstack_ansible']['nova']['need_service_restart'] | bool" - - - -# Note that the console services do not understand how to reload, so they fail -# when you try to make them do so. We therefore restart them instead. -- name: Restart the remaining nova services to ensure new RPC object version is used - hosts: "nova_console" - gather_facts: no - serial: "{{ nova_api_serial | default(['1', '100%']) }}" - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - nova - tasks: - - name: Execute nova service restart - import_tasks: common-tasks/restart-service.yml - vars: - service_name: "nova" - service_action: "restarted" - service_fact: "nova" - when: - - "nova_all_software_updated | bool" - - "ansible_local['openstack_ansible']['nova']['need_service_restart'] | bool" - - - -- name: Perform online database migrations - hosts: nova_conductor - gather_facts: no - user: root - environment: "{{ deployment_environment_variables | default({}) }}" - vars_files: - - "defaults/{{ install_method }}_install.yml" - tags: - - nova - tasks: - - name: Perform online data migrations - command: "{{ nova_bin }}/nova-manage db online_data_migrations" - become: yes - become_user: "{{ nova_system_user_name | default('nova') }}" - when: - - "nova_all_software_updated | bool" - - "ansible_local['openstack_ansible']['nova']['need_online_data_migrations'] | bool" - changed_when: false - run_once: yes - register: data_migrations - - - name: Disable the online migrations requirement - ini_file: - dest: "/etc/ansible/facts.d/openstack_ansible.fact" - section: nova - option: need_online_data_migrations - value: False - when: - - data_migrations is succeeded