From d22611b19d09e20994694eaadab13c2aa8ad4477 Mon Sep 17 00:00:00 2001 From: Thomas Herve Date: Mon, 5 Nov 2018 22:12:15 +0100 Subject: [PATCH] Fix config-download timeout Relying on mistral task timeout to fail the ansible command is awkward, because it doesn't kill the running command, and it doesn't give an easy indication of the error without looking at the status info. Instead, use the "timeout" command on the command line to invoke ansible, and check the status code to find out if it reached it. Change-Id: Iec15a06b85722ff6dc9209f0a5d4db333b71496f Closes-Bug: #1801902 --- tripleo_common/actions/ansible.py | 5 +++++ workbooks/deployment.yaml | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tripleo_common/actions/ansible.py b/tripleo_common/actions/ansible.py index cdc31365a..47aa93224 100644 --- a/tripleo_common/actions/ansible.py +++ b/tripleo_common/actions/ansible.py @@ -318,6 +318,8 @@ class AnsiblePlaybookAction(base.TripleOAction): 'blacklisted_hostnames', []) self.override_ansible_cfg = self._kwargs_for_run.pop( 'override_ansible_cfg', None) + self.command_timeout = self._kwargs_for_run.pop( + 'command_timeout', None) @property def work_dir(self): @@ -525,6 +527,9 @@ class AnsiblePlaybookAction(base.TripleOAction): 'OS_AUTH_TOKEN': security_ctx.auth_token, 'OS_PROJECT_NAME': security_ctx.project_name}) + if self.command_timeout: + command = ['timeout', self.command_timeout] + command + command = [str(c) for c in command] if self.reproduce_command: diff --git a/workbooks/deployment.yaml b/workbooks/deployment.yaml index 10827f4a7..3140bcfb0 100644 --- a/workbooks/deployment.yaml +++ b/workbooks/deployment.yaml @@ -372,7 +372,6 @@ workflows: Configure the overcloud with config-download. input: - - timeout: 240 - queue_name: tripleo - plan_name: overcloud - ssh_network: ctlplane @@ -493,7 +492,6 @@ workflows: run_ansible: action: tripleo.ansible-playbook - timeout: <% $.config_download_timeout %> input: inventory: <% $.inventory %> playbook: <% $.get('work_dir') %>/<% $.get('plan_name') %>/deploy_steps_playbook.yaml @@ -504,19 +502,20 @@ workflows: use_openstack_credentials: true verbosity: <% $.get('verbosity') %> become: true - timeout: <% $.timeout %> work_dir: <% $.get('work_dir') %>/<% $.get('plan_name') %> queue_name: <% $.queue_name %> reproduce_command: true trash_output: true blacklisted_hostnames: <% $.blacklisted_hostnames %> override_ansible_cfg: <% $.override_ansible_cfg %> + command_timeout: <% $.config_download_timeout %> publish: log_path: <% task().result.get('log_path') %> deployment_status: DEPLOY_SUCCESS on-success: - ansible_passed: <% task().result.returncode = 0 %> - - ansible_failed: <% task().result.returncode != 0 %> + - ansible_timeout: <% task().result.returncode = 124 %> + - ansible_failed: <% not task().result.returncode in [0, 124] %> on-error: send_message publish-on-error: status: FAILED @@ -530,6 +529,13 @@ workflows: message: Ansible passed. deployment_status: DEPLOY_SUCCESS + ansible_timeout: + on-success: send_message + publish: + status: FAILED + message: Ansible timed out at <% $.config_download_timeout %> seconds. + deployment_status: DEPLOY_FAILED + ansible_failed: on-success: send_message publish: