Fix missing mutex release when aborting builds

Currently when zuul aborts a build due to e.g. abandoning a change the
mutex doesn't get released. This blocks all further jobs requiring the
mutex until zuul is restarted. This adds test cases for detecting this
and the missing mutex releases.

Change-Id: I37e69310fed045c5a41bd4eccb151c8826f342ea
Story: 2000657
Task: 3115
This commit is contained in:
Tobias Henkel 2016-10-11 07:52:33 +02:00 committed by Joshua Hesketh
parent 226cdd4706
commit 46237f1559
3 changed files with 84 additions and 0 deletions

View File

@ -0,0 +1,23 @@
pipelines:
- name: check
manager: IndependentPipelineManager
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
verified: 1
failure:
gerrit:
verified: -1
jobs:
- name: mutex-one
mutex: test-mutex
- name: mutex-two
mutex: test-mutex
projects:
- name: org/project
check:
- project-test1

View File

@ -2394,6 +2394,63 @@ jobs:
self.assertEqual(B.reported, 1)
self.assertFalse('test-mutex' in self.sched.mutex.mutexes)
def test_mutex_abandon(self):
"Test abandon with job mutexes"
self.config.set('zuul', 'layout_config',
'tests/fixtures/layout-mutex.yaml')
self.sched.reconfigure(self.config)
self.worker.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.assertFalse('test-mutex' in self.sched.mutex.mutexes)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertTrue('test-mutex' in self.sched.mutex.mutexes)
self.fake_gerrit.addEvent(A.getChangeAbandonedEvent())
self.waitUntilSettled()
# The check pipeline should be empty
items = self.sched.layout.pipelines['check'].getAllItems()
self.assertEqual(len(items), 0)
# The mutex should be released
self.assertFalse('test-mutex' in self.sched.mutex.mutexes)
def test_mutex_reconfigure(self):
"Test reconfigure with job mutexes"
self.config.set('zuul', 'layout_config',
'tests/fixtures/layout-mutex.yaml')
self.sched.reconfigure(self.config)
self.worker.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.assertFalse('test-mutex' in self.sched.mutex.mutexes)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertTrue('test-mutex' in self.sched.mutex.mutexes)
self.config.set('zuul', 'layout_config',
'tests/fixtures/layout-mutex-reconfiguration.yaml')
self.sched.reconfigure(self.config)
self.waitUntilSettled()
self.worker.release('project-test1')
self.waitUntilSettled()
# The check pipeline should be empty
items = self.sched.layout.pipelines['check'].getAllItems()
self.assertEqual(len(items), 0)
# The mutex should be released
self.assertFalse('test-mutex' in self.sched.mutex.mutexes)
def test_node_label(self):
"Test that a job runs on a specific node label"
self.worker.registerFunction('build:node-project-test1:debian')

View File

@ -866,6 +866,8 @@ class Scheduler(threading.Thread):
self.log.exception(
"Exception while canceling build %s "
"for change %s" % (build, item.change))
finally:
self.mutex.release(build.build_set.item, build.job)
self.layout = layout
self.maintainConnectionCache()
for trigger in self.triggers.values():
@ -1540,6 +1542,8 @@ class BasePipelineManager(object):
except:
self.log.exception("Exception while canceling build %s "
"for change %s" % (build, item.change))
finally:
self.sched.mutex.release(build.build_set.item, build.job)
build.result = 'CANCELED'
canceled = True
self.updateBuildDescriptions(old_build_set)