diff --git a/README.rst b/README.rst index 483236a9..0f29bd26 100644 --- a/README.rst +++ b/README.rst @@ -157,6 +157,9 @@ Set to true to enable validations:: updates_validations: true +Enable extra logs during update. Default to true. It collects in /var/log/extras/ the output of the commands from collect_logs.yaml for every stages of the update run:: + + log_stages: true Dependencies ------------ diff --git a/defaults/main.yml b/defaults/main.yml index 1753c02a..953b0f8e 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -176,6 +176,11 @@ l3_agent_connectivity_check_wait_script: "{{ working_dir }}/l3_agent_wait_ping.s l3_agent_connectivity_check_stop_script: "{{ working_dir }}/l3_agent_stop_ping.sh" l3_agent_failover_check: false +# logs +log_playbook: "{{ working_dir }}/collect_log.yaml" +log_playbook_script: "{{ working_dir }}/collect_log" +log_stages: true + # enable web load test fip_http_check: false diff --git a/tasks/common/create_log_collection_scripts.yml b/tasks/common/create_log_collection_scripts.yml new file mode 100644 index 00000000..48806130 --- /dev/null +++ b/tasks/common/create_log_collection_scripts.yml @@ -0,0 +1,15 @@ +--- +- block: + - name: create log playbook + template: + src: "collect_logs.yaml.j2" + dest: "{{ log_playbook }}" + mode: 0775 + + - name: create script to run log playbook + template: + src: "collect_logs.sh.j2" + dest: "{{ log_playbook_script }}-{{ log_current_stage }}.sh" + mode: 0775 + + when: log_stages|bool diff --git a/tasks/common/trigger_log.yml b/tasks/common/trigger_log.yml new file mode 100644 index 00000000..a07ce645 --- /dev/null +++ b/tasks/common/trigger_log.yml @@ -0,0 +1,5 @@ +--- +- name: collect logs on the overcloud for the current stage + shell: | + {{ log_playbook_script }}-{{ log_current_stage }}.sh &>> {{ log_playbook_script }}-{{ log_current_stage }}.log + when: log_stages|bool diff --git a/tasks/update/create-overcloud-update-scripts.yaml b/tasks/update/create-overcloud-update-scripts.yaml index c7bdd691..f96ca81a 100644 --- a/tasks/update/create-overcloud-update-scripts.yaml +++ b/tasks/update/create-overcloud-update-scripts.yaml @@ -71,6 +71,34 @@ mode: 0755 force: true +- name: Create update log collection scripts + include_tasks: ../common/create_log_collection_scripts.yml + vars: + log_current_stage: '{{ item }}' + when: + - log_stages|bool + loop: + - before_ovn_controller_update + - before_ceph_update + - before_reboot + +- name: Create update run log collection for oc update run - batch + include_tasks: ../common/create_log_collection_scripts.yml + vars: + log_current_stage: 'before_oc_update_run' + when: + - overcloud_batch_update|bool + - log_stages|bool + +- name: Create update run log collection for oc update run - serial + include_tasks: ../common/create_log_collection_scripts.yml + vars: + log_current_stage: "before_oc_update_run_{{ item }}" + when: + - not overcloud_batch_update|bool + - log_stages|bool + loop: "{{ oc_roles|default(['all']) }}" + - name: create overcloud update script template: src: "overcloud_update_run.sh.j2" diff --git a/tasks/update/create-undercloud-update-scripts.yaml b/tasks/update/create-undercloud-update-scripts.yaml index aea0f93d..6d3c901b 100644 --- a/tasks/update/create-undercloud-update-scripts.yaml +++ b/tasks/update/create-undercloud-update-scripts.yaml @@ -20,3 +20,10 @@ - 'pre_undercloud_update_workarounds' - 'post_undercloud_update_workarounds' when: updates_workarounds|bool + +- name: collect log for the current stage - batch + include_tasks: ../common/create_log_collection_scripts.yml + vars: + log_current_stage: 'before_undercloud_update' + when: + - log_stages|bool diff --git a/tasks/update/main.yml b/tasks/update/main.yml index 8c00a820..383fbb70 100644 --- a/tasks/update/main.yml +++ b/tasks/update/main.yml @@ -37,6 +37,13 @@ - updates_validations - pre_update_validations + - name: collect log before undercloud update + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: 'before_undercloud_update' + when: + - log_stages|bool + - name: update undercloud shell: | set -o pipefail @@ -132,6 +139,13 @@ tags: - overcloud_update_prepare_containers + - name: collect log before OVN controller update + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: 'before_ovn_controller_update' + when: + - log_stages|bool + - name: Update OVN controllers. shell: | set -o pipefail @@ -192,6 +206,13 @@ tags: - overcloud_update_run + - name: collect log before ceph update + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: 'before_ceph_update' + when: + - log_stages|bool + - name: update Ceph import_tasks: ceph_update_run.yml when: ceph_osd_enabled|bool @@ -210,6 +231,13 @@ - name: run post-update fencing check import_tasks: enable_fencing.yaml + - name: collect log after update, but before reboot. + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: 'before_reboot' + when: + - log_stages|bool + - name: run post-update validation import_tasks: ../common/validation_group_run.yaml vars: diff --git a/tasks/update/overcloud_update_run.yml b/tasks/update/overcloud_update_run.yml index f36721c6..b0307ad0 100644 --- a/tasks/update/overcloud_update_run.yml +++ b/tasks/update/overcloud_update_run.yml @@ -1,4 +1,12 @@ --- +- name: collect log for the current stage - batch + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: 'before_oc_update_run' + when: + - overcloud_batch_update|bool + - log_stages|bool + - name: Are we running in parallel or serially ? debug: msg: "{{ (overcloud_batch_update|bool) | ternary('Running in parallel', 'Running serially') }}" diff --git a/tasks/update/overcloud_update_run_role.yml b/tasks/update/overcloud_update_run_role.yml index 5cb9120a..ed5650f3 100644 --- a/tasks/update/overcloud_update_run_role.yml +++ b/tasks/update/overcloud_update_run_role.yml @@ -2,6 +2,14 @@ - name: import tasks from l3_agent_connectivity_check_start_script import_tasks: ../common/l3_agent_connectivity_check_start_script.yml +- name: collect log for the current stage - serial + include_tasks: ../common/trigger_log.yml + vars: + log_current_stage: "before_oc_update_run_{{ oc_current_role[0] }}" + when: + - not overcloud_batch_update|bool + - log_stages|bool + - name: run overcloud minor update in each of the roles/hostgroups async: 25200 poll: 0 diff --git a/templates/collect_logs.sh.j2 b/templates/collect_logs.sh.j2 new file mode 100644 index 00000000..0be96327 --- /dev/null +++ b/templates/collect_logs.sh.j2 @@ -0,0 +1,24 @@ +#!/usr/bin/bash +# +# Script to collect some logs during update stages. + +CURRENT_STAGE=${1:-{{ log_current_stage }}} +SSH_USER={{ (overcloud_ssh_user) | ternary(overcloud_ssh_user, 'tripleo-admin') }} + +# This should always be true for tripleo>=wallaby. +if [ ! -f {{ upgrade_validation_inventory }} ]; then + # Then we create one for tripleo> /var/log/extra/date-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: save packages list at this stage + shell: | + dnf list installed &>> /var/log/extra/packages-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get podman container state at this stage + shell: | + podman ps --all &>> /var/log/extra/container-ps-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get podman images state at this stage + shell: | + podman images &>> /var/log/extra/container-images-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get process list at this stage + shell: | + ps fauxwww &>> /var/log/extra/ps-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get systemd information at this stage + shell: | + systemctl &>> /var/log/extra/systemctl-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get tripleo services information at this stage + shell: | + systemctl status 'tripleo*' &>> /var/log/extra/systemctl-tripleo-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get cgroup information at this stage + shell: | + systemd-cgls &>> /var/log/extra/cgroups-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get release state at this stage + shell: | + for i in rhosp redhat; do + if [ ! -e /etc/${i}-release ]; then + cat /etc/${i}-release &>> /var/log/extra/release-{% raw %}{{ current_stage }}{% endraw %}.txt + fi + done + +- hosts: ovn_controller + gather_facts: false + become: true + tasks: + - name: get OVN external id parameter at this stage + shell: | + ovs-vsctl get open . external_ids &>> /var/log/extra/ovn_external_id-{% raw %}{{ current_stage }}{% endraw %}.txt + + - name: get OVN flows at this stage + shell: | + ovs-ofctl dump-flows br-int &>> /var/log/extra/ovn_flows_id-{% raw %}{{ current_stage }}{% endraw %}.txt + +- hosts: pacemaker + gather_facts: false + become: true + tasks: + - name: ensure extra directory is present + file: + name: /var/log/extra + state: directory + owner: root + group: root + mode: 0755 + - name: get cluster state at this stage + shell: | + pcs status &>> /var/log/extra/pcslog-{% raw %}{{ current_stage }}{% endraw %}.txt || true + pcs constraint &>> /var/log/extra/pcslog-{% raw %}{{ current_stage }}{% endraw %}.txt || true + +- hosts: undercloud + gather_facts: false + become: false + tasks: + - name: ensure extra directory is present + file: + name: /var/log/extra + state: directory + owner: root + group: root + mode: 0755 + - name: Information about running vm. + shell: | + for i in $(openstack --os-cloud {{ overcloud_stack_name }} server list -f value -c Name); do + openstack --os-cloud {{ overcloud_stack_name }} server show $i > /var/log/extra/oc-server-$i-{% raw %}{{ current_stage }}{% endraw %}.txt; + done