From c982bfed4d1ab2511464759e948a85c1b002d424 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Fri, 12 Oct 2018 09:48:47 +0200 Subject: [PATCH] Retry jobs failed with MERGER_FAILURE We sometimes get failed jobs that failed with MERGER_FAILURE. This error is misleading to the user because it actually doesn't indicate a merge conflict but some infrastructure related error. We already have various retry mechanisms in place that retry most of the possible failure causes within the executor. But catching all these places we need to retry is difficult so we should add a safety net and reschedule jobs that failed with MERGER_FAILURE. Change-Id: I8844b11850c0a2cd3faddb7d8e944750c9da78ea --- zuul/executor/client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/zuul/executor/client.py b/zuul/executor/client.py index 013dc2ea2e..a1d251d93d 100644 --- a/zuul/executor/client.py +++ b/zuul/executor/client.py @@ -395,6 +395,21 @@ class ExecutorClient(object): if result in ('DISCONNECT', 'ABORTED'): # Always retry if the executor just went away build.retry = True + if result == 'MERGER_FAILURE': + # The build result MERGER_FAILURE is a bit misleading here + # because when we got here we know that there are no merge + # conflicts. Instead this is most likely caused by some + # infrastructure failure. This can be anything like connection + # issue, drive corruption, full disk, corrupted git cache, etc. + # This may or may not be a recoverable failure so we should + # retry here respecting the max retries. But to be able to + # distinguish from RETRY_LIMIT which normally indicates pre + # playbook failures we keep the build result after the max + # attempts. + if (build.build_set.getTries(build.job.name) < + build.job.attempts): + build.retry = True + result_data = data.get('data', {}) warnings = data.get('warnings', []) self.log.info("Build %s complete, result %s, warnings %s" %