Merge "Add post-timeout setting"

This commit is contained in:
Zuul 2018-02-16 21:23:12 +00:00 committed by Gerrit Code Review
commit ce769e62a4
9 changed files with 51 additions and 5 deletions

View File

@ -710,6 +710,21 @@ Here is an example of two job definitions:
timeout is supplied, the job may run indefinitely. Supplying a
timeout is highly recommended.
This timeout only applies to the pre-run and run playbooks in a
job.
.. attr:: post-timeout
The time in seconds that each post playbook should be allowed to run
before it is automatically aborted and failure is reported. If no
post-timeout is supplied, the job may run indefinitely. Supplying a
post-timeout is highly recommended.
The post-timeout is handled separately from the above timeout because
the post playbooks are typically where you will copy jobs logs.
In the event of the pre-run or run playbooks timing out we want to
do our best to copy the job logs in the post-run playbooks.
.. attr:: attempts
:default: 3

View File

@ -289,6 +289,10 @@ of item.
The job timeout, in seconds.
.. var:: post_timeout
The post-run playbook timeout, in seconds.
.. var:: jobtags
A list of tags associated with the job. Not to be confused with

View File

@ -97,6 +97,12 @@
run: playbooks/timeout.yaml
timeout: 1
- job:
parent: python27
name: post-timeout
post-run: playbooks/timeout.yaml
post-timeout: 1
- job:
parent: python27
name: check-vars

View File

@ -17,5 +17,6 @@
- check-vars
- check-secret-names
- timeout
- post-timeout
- hello-world
- failpost

View File

@ -2048,6 +2048,12 @@ class TestAnsible(AnsibleZuulTestCase):
build_timeout = self.getJobFromHistory('timeout')
with self.jobLog(build_timeout):
self.assertEqual(build_timeout.result, 'TIMED_OUT')
post_flag_path = os.path.join(self.test_root, build_timeout.uuid +
'.post.flag')
self.assertTrue(os.path.exists(post_flag_path))
build_post_timeout = self.getJobFromHistory('post-timeout')
with self.jobLog(build_post_timeout):
self.assertEqual(build_post_timeout.result, 'POST_FAILURE')
build_faillocal = self.getJobFromHistory('faillocal')
with self.jobLog(build_faillocal):
self.assertEqual(build_faillocal.result, 'FAILURE')

View File

@ -498,6 +498,7 @@ class JobParser(object):
# validation happens in NodeSetParser
'nodeset': vs.Any(dict, str),
'timeout': int,
'post-timeout': int,
'attempts': int,
'pre-run': to_list(str),
'post-run': to_list(str),
@ -525,6 +526,7 @@ class JobParser(object):
'abstract',
'protected',
'timeout',
'post-timeout',
'workspace',
'voting',
'hold-following-changes',
@ -634,6 +636,10 @@ class JobParser(object):
int(conf['timeout']) > tenant.max_job_timeout:
raise MaxTimeoutError(job, tenant)
if conf.get('post-timeout') and tenant.max_job_timeout != -1 and \
int(conf['post-timeout']) > tenant.max_job_timeout:
raise MaxTimeoutError(job, tenant)
if 'post-review' in conf:
if conf['post-review']:
job.post_review = True

View File

@ -186,6 +186,7 @@ class ExecutorClient(object):
params = dict()
params['job'] = job.name
params['timeout'] = job.timeout
params['post_timeout'] = job.post_timeout
params['items'] = merger_items
params['projects'] = []
if hasattr(item.change, 'branch'):

View File

@ -878,8 +878,10 @@ class AnsibleJob(object):
success = False
self.started = True
time_started = time.time()
# timeout value is total job timeout or put another way
# the cummulative time that pre, run, and post can consume.
# timeout value is "total" job timeout which accounts for
# pre-run and run playbooks. post-run is different because
# it is used to copy out job logs and we want to do our best
# to copy logs even when the job has timed out.
job_timeout = args['timeout']
for index, playbook in enumerate(self.jobdir.pre_playbooks):
# TODOv3(pabelanger): Implement pre-run timeout setting.
@ -914,11 +916,15 @@ class AnsibleJob(object):
# run it again.
return None
post_timeout = args['post_timeout']
for index, playbook in enumerate(self.jobdir.post_playbooks):
# TODOv3(pabelanger): Implement post-run timeout setting.
ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
# Post timeout operates a little differently to the main job
# timeout. We give each post playbook the full post timeout to
# do its job because post is where you'll often record job logs
# which are vital to understanding why timeouts have happened in
# the first place.
post_status, post_code = self.runAnsiblePlaybook(
playbook, ansible_timeout, success, phase='post', index=index)
playbook, post_timeout, success, phase='post', index=index)
if post_status == self.RESULT_ABORTED:
return 'ABORTED'
if post_status != self.RESULT_NORMAL or post_code != 0:

View File

@ -839,6 +839,7 @@ class Job(object):
self.execution_attributes = dict(
parent=None,
timeout=None,
post_timeout=None,
variables={},
nodeset=NodeSet(),
workspace=None,