From 77613963036cc0e9fc98a69f234b1bbb86690076 Mon Sep 17 00:00:00 2001 From: Felix Edel Date: Mon, 6 Nov 2023 08:30:54 +0100 Subject: [PATCH] mirror-workspace-git-repos: Retry on failure in git update task We occasionally see the this task fail for the first element in the zuul.projects list with a MODULE FAILURE and a return code of -13 (SIGPIPE) [1]. So far we couldn't identify the root cause, so try to mitigate this issue by retrying on failure. This solution is similar to the one used for the "Synchronize repos" task[2]. There is a bug report in Ansible that fits Since it's only the first element in the loop that is failing while subsequent elements are successful, we currently have two assumptions: 1. As the task before is using a `delegate_to: localhost' [3], there might be a problem with Ansible when switching the connection from localhost to the remote host (node). 2. Since the task before is using the same SSH connection [4] that is used by Ansible to push the git repository, there might be some "leftovers" on the connection that make the next task fail. 3. There is also a bug report in Ansible [5] which might be causing that error. [1]: { "ansible_loop_var": "zj_project", "changed": false, "failed": true, "module_stderr": "", "module_stdout": "", "msg": "MODULE FAILURE\nSee stdout/stderr for the exact error", "rc": -13, "zj_project": {...} } [2]: https://opendev.org/zuul/zuul-jobs/src/commit/3b3495e2557e55b70d47ab799bf71e58f095a918/roles/mirror-workspace-git-repos/tasks/main.yaml#L32 [3]: https://opendev.org/zuul/zuul-jobs/src/commit/3b3495e2557e55b70d47ab799bf71e58f095a918/roles/mirror-workspace-git-repos/tasks/main.yaml#L25 [4]: https://opendev.org/zuul/zuul-jobs/src/commit/3b3495e2557e55b70d47ab799bf71e58f095a918/roles/mirror-workspace-git-repos/tasks/main.yaml#L16 [5]: https://github.com/ansible/ansible/issues/81777 Change-Id: I0c4cb87bb076b9b40c9c446dbe5db437daff5897 --- roles/mirror-workspace-git-repos/tasks/main.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/roles/mirror-workspace-git-repos/tasks/main.yaml b/roles/mirror-workspace-git-repos/tasks/main.yaml index 54398b22c..cc40015d6 100644 --- a/roles/mirror-workspace-git-repos/tasks/main.yaml +++ b/roles/mirror-workspace-git-repos/tasks/main.yaml @@ -56,6 +56,14 @@ with_dict: "{{ zuul.projects }}" loop_control: loop_var: zj_project + # We occasionally see the this task fail for the first element in the + # zuul.projects list with a MODULE FAILURE and a return code of -13 + # (SIGPIPE). This may be caused by + # https://github.com/ansible/ansible/issues/81777 + # Try to mitigate this issue by retrying on failure. + register: git_update + until: git_update is success + retries: 3 # ANSIBLE0006: Skip linting since it triggers on the "git" command, # but we prefer the shell above tags: