Merge "Add log collection during update."

This commit is contained in:
Zuul 2023-02-15 13:04:42 +00:00 committed by Gerrit Code Review
commit fdc122d544
11 changed files with 227 additions and 0 deletions

View File

@ -157,6 +157,9 @@ Set to true to enable validations::
updates_validations: true
Enable extra logs during update. Default to true. It collects in /var/log/extras/ the output of the commands from collect_logs.yaml for every stages of the update run::
log_stages: true
Dependencies
------------

View File

@ -176,6 +176,11 @@ l3_agent_connectivity_check_wait_script: "{{ working_dir }}/l3_agent_wait_ping.s
l3_agent_connectivity_check_stop_script: "{{ working_dir }}/l3_agent_stop_ping.sh"
l3_agent_failover_check: false
# logs
log_playbook: "{{ working_dir }}/collect_log.yaml"
log_playbook_script: "{{ working_dir }}/collect_log"
log_stages: true
# enable web load test
fip_http_check: false

View File

@ -0,0 +1,15 @@
---
- block:
- name: create log playbook
template:
src: "collect_logs.yaml.j2"
dest: "{{ log_playbook }}"
mode: 0775
- name: create script to run log playbook
template:
src: "collect_logs.sh.j2"
dest: "{{ log_playbook_script }}-{{ log_current_stage }}.sh"
mode: 0775
when: log_stages|bool

View File

@ -0,0 +1,5 @@
---
- name: collect logs on the overcloud for the current stage
shell: |
{{ log_playbook_script }}-{{ log_current_stage }}.sh &>> {{ log_playbook_script }}-{{ log_current_stage }}.log
when: log_stages|bool

View File

@ -71,6 +71,34 @@
mode: 0755
force: true
- name: Create update log collection scripts
include_tasks: ../common/create_log_collection_scripts.yml
vars:
log_current_stage: '{{ item }}'
when:
- log_stages|bool
loop:
- before_ovn_controller_update
- before_ceph_update
- before_reboot
- name: Create update run log collection for oc update run - batch
include_tasks: ../common/create_log_collection_scripts.yml
vars:
log_current_stage: 'before_oc_update_run'
when:
- overcloud_batch_update|bool
- log_stages|bool
- name: Create update run log collection for oc update run - serial
include_tasks: ../common/create_log_collection_scripts.yml
vars:
log_current_stage: "before_oc_update_run_{{ item }}"
when:
- not overcloud_batch_update|bool
- log_stages|bool
loop: "{{ oc_roles|default(['all']) }}"
- name: create overcloud update script
template:
src: "overcloud_update_run.sh.j2"

View File

@ -20,3 +20,10 @@
- 'pre_undercloud_update_workarounds'
- 'post_undercloud_update_workarounds'
when: updates_workarounds|bool
- name: collect log for the current stage - batch
include_tasks: ../common/create_log_collection_scripts.yml
vars:
log_current_stage: 'before_undercloud_update'
when:
- log_stages|bool

View File

@ -37,6 +37,13 @@
- updates_validations
- pre_update_validations
- name: collect log before undercloud update
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: 'before_undercloud_update'
when:
- log_stages|bool
- name: update undercloud
shell: |
set -o pipefail
@ -132,6 +139,13 @@
tags:
- overcloud_update_prepare_containers
- name: collect log before OVN controller update
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: 'before_ovn_controller_update'
when:
- log_stages|bool
- name: Update OVN controllers.
shell: |
set -o pipefail
@ -192,6 +206,13 @@
tags:
- overcloud_update_run
- name: collect log before ceph update
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: 'before_ceph_update'
when:
- log_stages|bool
- name: update Ceph
import_tasks: ceph_update_run.yml
when: ceph_osd_enabled|bool
@ -210,6 +231,13 @@
- name: run post-update fencing check
import_tasks: enable_fencing.yaml
- name: collect log after update, but before reboot.
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: 'before_reboot'
when:
- log_stages|bool
- name: run post-update validation
import_tasks: ../common/validation_group_run.yaml
vars:

View File

@ -1,4 +1,12 @@
---
- name: collect log for the current stage - batch
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: 'before_oc_update_run'
when:
- overcloud_batch_update|bool
- log_stages|bool
- name: Are we running in parallel or serially ?
debug:
msg: "{{ (overcloud_batch_update|bool) | ternary('Running in parallel', 'Running serially') }}"

View File

@ -2,6 +2,14 @@
- name: import tasks from l3_agent_connectivity_check_start_script
import_tasks: ../common/l3_agent_connectivity_check_start_script.yml
- name: collect log for the current stage - serial
include_tasks: ../common/trigger_log.yml
vars:
log_current_stage: "before_oc_update_run_{{ oc_current_role[0] }}"
when:
- not overcloud_batch_update|bool
- log_stages|bool
- name: run overcloud minor update in each of the roles/hostgroups
async: 25200
poll: 0

View File

@ -0,0 +1,24 @@
#!/usr/bin/bash
#
# Script to collect some logs during update stages.
CURRENT_STAGE=${1:-{{ log_current_stage }}}
SSH_USER={{ (overcloud_ssh_user) | ternary(overcloud_ssh_user, 'tripleo-admin') }}
# This should always be true for tripleo>=wallaby.
if [ ! -f {{ upgrade_validation_inventory }} ]; then
# Then we create one for tripleo<wallaby.
INVENTORY="${HOME}/inventory.yaml"
if [ ! -f "${INVENTORY}" ]; then
. $HOME/stackrc
tripleo-ansible-inventory \
--plan "{{ overcloud_stack_name }}" \
--ansible_ssh_user ${SSH_USER} \
--static-yaml-inventory \
"${INVENTORY}"
fi
else
INVENTORY={{ upgrade_validation_inventory }}
fi
ansible-playbook -i "${INVENTORY}" -e current_stage="${CURRENT_STAGE}" {{ log_playbook }}

View File

@ -0,0 +1,96 @@
---
- hosts: allovercloud,Undercloud
gather_facts: false
become: true
tasks:
- name: ensure extra directory is present
file:
name: /var/log/extra
state: directory
owner: root
group: root
mode: 0755
- name: capture date at this stage
shell: |
date '+%Y-%m-%d %H:%M:%S%z' &>> /var/log/extra/date-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: save packages list at this stage
shell: |
dnf list installed &>> /var/log/extra/packages-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get podman container state at this stage
shell: |
podman ps --all &>> /var/log/extra/container-ps-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get podman images state at this stage
shell: |
podman images &>> /var/log/extra/container-images-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get process list at this stage
shell: |
ps fauxwww &>> /var/log/extra/ps-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get systemd information at this stage
shell: |
systemctl &>> /var/log/extra/systemctl-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get tripleo services information at this stage
shell: |
systemctl status 'tripleo*' &>> /var/log/extra/systemctl-tripleo-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get cgroup information at this stage
shell: |
systemd-cgls &>> /var/log/extra/cgroups-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get release state at this stage
shell: |
for i in rhosp redhat; do
if [ ! -e /etc/${i}-release ]; then
cat /etc/${i}-release &>> /var/log/extra/release-{% raw %}{{ current_stage }}{% endraw %}.txt
fi
done
- hosts: ovn_controller
gather_facts: false
become: true
tasks:
- name: get OVN external id parameter at this stage
shell: |
ovs-vsctl get open . external_ids &>> /var/log/extra/ovn_external_id-{% raw %}{{ current_stage }}{% endraw %}.txt
- name: get OVN flows at this stage
shell: |
ovs-ofctl dump-flows br-int &>> /var/log/extra/ovn_flows_id-{% raw %}{{ current_stage }}{% endraw %}.txt
- hosts: pacemaker
gather_facts: false
become: true
tasks:
- name: ensure extra directory is present
file:
name: /var/log/extra
state: directory
owner: root
group: root
mode: 0755
- name: get cluster state at this stage
shell: |
pcs status &>> /var/log/extra/pcslog-{% raw %}{{ current_stage }}{% endraw %}.txt || true
pcs constraint &>> /var/log/extra/pcslog-{% raw %}{{ current_stage }}{% endraw %}.txt || true
- hosts: undercloud
gather_facts: false
become: false
tasks:
- name: ensure extra directory is present
file:
name: /var/log/extra
state: directory
owner: root
group: root
mode: 0755
- name: Information about running vm.
shell: |
for i in $(openstack --os-cloud {{ overcloud_stack_name }} server list -f value -c Name); do
openstack --os-cloud {{ overcloud_stack_name }} server show $i > /var/log/extra/oc-server-$i-{% raw %}{{ current_stage }}{% endraw %}.txt;
done