From c982bfed4d1ab2511464759e948a85c1b002d424 Mon Sep 17 00:00:00 2001
From: Tobias Henkel <tobias.henkel@bmw.de>
Date: Fri, 12 Oct 2018 09:48:47 +0200
Subject: [PATCH] Retry jobs failed with MERGER_FAILURE

We sometimes get failed jobs that failed with MERGER_FAILURE. This
error is misleading to the user because it actually doesn't indicate a
merge conflict but some infrastructure related error. We already have
various retry mechanisms in place that retry most of the possible
failure causes within the executor. But catching all these places we
need to retry is difficult so we should add a safety net and
reschedule jobs that failed with MERGER_FAILURE.

Change-Id: I8844b11850c0a2cd3faddb7d8e944750c9da78ea
---
 zuul/executor/client.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/zuul/executor/client.py b/zuul/executor/client.py
index 013dc2ea2e..a1d251d93d 100644
--- a/zuul/executor/client.py
+++ b/zuul/executor/client.py
@@ -395,6 +395,21 @@ class ExecutorClient(object):
             if result in ('DISCONNECT', 'ABORTED'):
                 # Always retry if the executor just went away
                 build.retry = True
+            if result == 'MERGER_FAILURE':
+                # The build result MERGER_FAILURE is a bit misleading here
+                # because when we got here we know that there are no merge
+                # conflicts. Instead this is most likely caused by some
+                # infrastructure failure. This can be anything like connection
+                # issue, drive corruption, full disk, corrupted git cache, etc.
+                # This may or may not be a recoverable failure so we should
+                # retry here respecting the max retries. But to be able to
+                # distinguish from RETRY_LIMIT which normally indicates pre
+                # playbook failures we keep the build result after the max
+                # attempts.
+                if (build.build_set.getTries(build.job.name) <
+                    build.job.attempts):
+                    build.retry = True
+
             result_data = data.get('data', {})
             warnings = data.get('warnings', [])
             self.log.info("Build %s complete, result %s, warnings %s" %