tripleo-upgrade/tasks/update/ceph_update_run.yml

---
- name: Load variables from container environment file
  slurp:
    src: "{{ working_dir }}/{{ uc_containers_prepare_file }}"
  register: container_env

- name: Set required ceph image facts from container environment file
  set_fact:
    ceph_image_tag: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_tag }}"
    ceph_image: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_image }}"
    ceph_namespace: "{{ (container_env.content|b64decode|from_yaml).parameter_defaults.ContainerImagePrepare[0].set.ceph_namespace.split('/')[1] }}"
    container_registry: "{{ undercloud_short_host_name }}.{{ docker_registry_network }}.{{ undercloud_domain }}:8787"

- name: Get Ceph cluster health status
  shell: cephadm shell ceph -s -f json | jq .health.status -r
  register: ceph_status
  become: true
  become_user: root
  delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"

- name: Check if ceph update is required
  shell: cephadm shell ceph orch upgrade check {{ image }} | jq .needs_update -r
  vars:
    image: "{{ container_registry }}/{{ ceph_namespace }}/{{ ceph_image }}:{{ ceph_image_tag }}"
  register: ceph_needs_update
  become: true
  become_user: root
  delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"

- name: Fail before ceph update when ceph cluster is unhealthy
  fail:
    msg: Ceph update failed because ceph cluster is unhealthy
  when:
    - ceph_status.stdout != "HEALTH_OK"
    - ceph_needs_update.stdout != "{}"

- name: Exclude ceph images from container check when ceph update not required
  lineinfile:
    path: "{{ log_playbook_script }}-before_reboot.sh"
    regexp: "^EXCLUDED_CONTAINERS_FROM_CHECK="
    line: "EXCLUDED_CONTAINERS_FROM_CHECK=${2:-{{ excluded_containers_from_check }},ceph}"
    state: present
    backup: true
  when: ceph_needs_update.stdout == "{}"

- block:
    - name: import tasks from l3_agent_connectivity_check_start_script
      import_tasks: ../common/l3_agent_connectivity_check_start_script.yml

    - name: Start Ceph update using cephadm
      command:
        cmd: >
          cephadm shell --
          ceph orch upgrade start --image {{ image }}
      vars:
        image: "{{ container_registry }}/{{ ceph_namespace }}/{{ ceph_image }}:{{ ceph_image_tag }}"
      become: true
      become_user: root
      delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"

    - name: Wait while ceph update is in progress
      shell: |
        set -o pipefail
        cephadm shell -- ceph orch upgrade status | jq .in_progress -r
      changed_when: false
      register: ceph_upgrade_progress
      retries: "{{ ceph_update_timeout }}"
      delay: 60
      until:
        - ceph_upgrade_progress.stdout == "false"
      become: true
      become_user: root
      delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"

    - name: Check update status
      shell: |
        set -o pipefail
        cephadm shell -- ceph log last cephadm | grep 'Upgrade: Complete!'
      become: true
      become_user: root
      delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"
      register: ceph_update_status

    - name: Fail Ceph update
      fail:
        msg: Ceph minor update failed
      when: ceph_update_status.rc != 0
  when: ceph_needs_update.stdout != "{}"
  rescue:
    - name: print Ceph update failures
      command:
        cmd: >
          cephadm shell --
          ceph log last cephadm
      become: true
      become_user: root
      delegate_to: "{{ inventory_hostmap[controller_role_name]|first }}"

    - name: Fail Ceph update
      fail:
        msg: Ceph minor update failed
  always:
    - name: import tasks from l3_agent_connectivity_check_stop_script
      import_tasks: ../common/l3_agent_connectivity_check_stop_script.yml
      vars:
        current_stage_error: "{{ update_loss_threshold }}"