Fix issue with early failure and blocks

Ansible tasks that fail within block tasks call the failure
callback, which means that they triggered Zuul early failure
detection even if they were later rescued.  To avoid this,
ignore block tasks for purposes of early failure detection.

Also make early failure detection "sticky".  This helps
uncover errors like this (the "remote" tests uncover this
particular failure if the result is made sticky), and also
ensures consistent behavior in dependent pipelines.

Change-Id: I505667678e7384386819b5389036e4fb4f108afd
This commit is contained in:
James E. Blair 2023-08-21 16:08:01 -07:00
parent df709af7f5
commit f04d80912e
3 changed files with 33 additions and 2 deletions

View File

@ -0,0 +1,10 @@
---
fixes:
- |
Ansible task failures in block/rescue stanzas could cause Zuul to
erroneously trigger early-failure behavior for the build, which
could result in inconsistent behavior in a dependent pipeline.
Task failures in Ansible blocks are no longer considered for early
failure detection, and if a build encounters an early failure, it
will cause the build result to be reported as a failure in all
cases.

View File

@ -42,6 +42,7 @@ import socket
import threading
import time
from ansible.playbook.block import Block
from ansible.plugins.callback import default
from ansible.module_utils._text import to_text
from ansible.module_utils.parsing.convert_bool import boolean
@ -93,6 +94,15 @@ def zuul_filter_result(result):
return stdout_lines + stderr_lines
def is_rescuable(task):
if task._parent is None:
return False
if isinstance(task._parent, Block):
if task._parent.rescue:
return True
return is_rescuable(task._parent)
class CallbackModule(default.CallbackModule):
'''
@ -462,7 +472,8 @@ class CallbackModule(default.CallbackModule):
def v2_runner_on_failed(self, result, ignore_errors=False):
ret = self._v2_runner_on_failed(result, ignore_errors)
if not ignore_errors:
if (not ignore_errors and
not is_rescuable(result._task)):
self._result_logger.info("failure")
return ret

View File

@ -978,6 +978,7 @@ class AnsibleJob(object):
with executor_server.zk_context as ctx:
self.job = FrozenJob.fromZK(ctx, arguments["job_ref"])
self.arguments["zuul"].update(zuul_params_from_job(self.job))
self.early_failure = False
self.zuul_event_id = self.arguments["zuul_event_id"]
# Record ansible version being used for the cleanup phase
@ -1827,7 +1828,15 @@ class AnsibleJob(object):
elif job_status == self.RESULT_NORMAL:
success = (job_code == 0)
if success:
result = 'SUCCESS'
if self.early_failure:
# Override the result, but proceed as
# normal.
self.log.info(
"Overriding SUCCESS result as FAILURE "
"due to early failure detection")
result = 'FAILURE'
else:
result = 'SUCCESS'
else:
result = 'FAILURE'
break
@ -2871,6 +2880,7 @@ class AnsibleJob(object):
allow_pre_fail = False
if allow_pre_fail and result_line == b'failure':
self.log.info("Early failure in job")
self.early_failure = True
self.executor_server.updateBuildStatus(
self.build_request, {'pre_fail': True})
else: