tripleo-upgrade/tasks/update/ceph_update_run.yml

107 lines
3.9 KiB
YAML

---
- name: Load variables from container environment file
slurp:
src: "{{ working_dir }}/{{ uc_containers_prepare_file }}"
register: container_env
- name: Set required ceph image facts from container environment file
set_fact:
ceph_image_tag: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_tag }}"
ceph_image: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_image }}"
ceph_namespace: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_namespace.split('/')[1] }}"
container_registry: "{{ undercloud_short_host_name }}.{{ docker_registry_network }}.{{ undercloud_domain }}:8787"
- name: Get Ceph cluster health status
shell: cephadm shell ceph -s -f json | jq .health.status -r
register: ceph_status
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
- name: Check if ceph update is required
shell: cephadm shell ceph orch upgrade check {{ image }} | jq .needs_update -r
vars:
image: "{{ container_registry }}/{{ ceph_namespace }}/{{ ceph_image }}:{{ ceph_image_tag }}"
register: ceph_needs_update
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
- name: Fail before ceph update when ceph cluster is unhealthy
fail:
msg: Ceph update failed because ceph cluster is unhealthy
when:
- ceph_status.stdout != "HEALTH_OK"
- ceph_needs_update.stdout != "{}"
- name: Exclude ceph images from container check when ceph update not required
lineinfile:
path: "{{ log_playbook_script }}-before_reboot.sh"
regexp: "^EXCLUDED_CONTAINERS_FROM_CHECK="
line: "EXCLUDED_CONTAINERS_FROM_CHECK=${2:-{{ excluded_containers_from_check }},ceph}"
state: present
backup: true
when: ceph_needs_update.stdout == "{}"
- block:
- name: import tasks from l3_agent_connectivity_check_start_script
import_tasks: ../common/l3_agent_connectivity_check_start_script.yml
- name: Start Ceph update using cephadm
command:
cmd: >
cephadm shell --
ceph orch upgrade start --image {{ image }}
vars:
image: "{{ container_registry }}/{{ ceph_namespace }}/{{ ceph_image }}:{{ ceph_image_tag }}"
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
- name: Wait while ceph update is in progress
shell: |
set -o pipefail
cephadm shell -- ceph orch upgrade status | jq .in_progress -r
changed_when: false
register: ceph_upgrade_progress
retries: "{{ ceph_update_timeout }}"
delay: 60
until:
- ceph_upgrade_progress.stdout == "false"
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
- name: Check update status
shell: |
set -o pipefail
cephadm shell -- ceph log last cephadm | grep 'Upgrade: Complete!'
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
register: ceph_update_status
- name: Fail Ceph update
fail:
msg: Ceph minor update failed
when: ceph_update_status.rc != 0
when: ceph_needs_update.stdout != "{}"
rescue:
- name: print Ceph update failures
command:
cmd: >
cephadm shell --
ceph log last cephadm
become: true
become_user: root
delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
- name: Fail Ceph update
fail:
msg: Ceph minor update failed
always:
- name: import tasks from l3_agent_connectivity_check_stop_script
import_tasks: ../common/l3_agent_connectivity_check_stop_script.yml
vars:
current_stage_error: "{{ update_loss_threshold }}"