Fix deduplication with paused jobs

When a deduplicated job paused, it would not wait for all children
across all queue items to complete before resuming; instead it
would wait only for the children in its own queue item.

Check all queue items a build is in before resuming it.

Change-Id: Ic2dec3a6dc58230b0873d7e8ba474bc39ed28385
This commit is contained in:
James E. Blair 2023-09-08 11:13:03 -07:00
parent 493a136dba
commit 68f80f9749
3 changed files with 206 additions and 4 deletions

View File

@ -0,0 +1,92 @@
- queue:
name: integrated
allow-circular-dependencies: true
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- pipeline:
name: gate
manager: dependent
success-message: Build succeeded (gate).
require:
gerrit:
approval:
- Approved: 1
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/run.yaml
nodeset:
nodes:
- label: debian
name: controller
- job:
name: common-job
deduplicate: true
- job:
name: project1-job
- job:
name: project2-job
- job:
name: child-job
deduplicate: false
- project:
name: org/project1
queue: integrated
check:
jobs:
- common-job
- project1-job:
dependencies: common-job
gate:
jobs:
- common-job
- project1-job:
dependencies: common-job
- project:
name: org/project2
queue: integrated
check:
jobs:
- common-job
- project2-job:
dependencies: common-job
gate:
jobs:
- common-job
- project2-job:
dependencies: common-job

View File

@ -1878,6 +1878,60 @@ class TestGerritCircularDependencies(ZuulTestCase):
ref='refs/changes/01/1/1'),
], ordered=False)
@simple_layout('layouts/job-dedup-paused-parent.yaml')
def test_job_deduplication_paused_parent(self):
# Pause a parent job
# Ensure it waits for all children before continuing
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
# A <-> B
A.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
A.subject, B.data["url"]
)
B.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
B.subject, A.data["url"]
)
self.executor_server.returnData(
'common-job', A,
{'zuul': {'pause': True}}
)
self.executor_server.returnData(
'common-job', B,
{'zuul': {'pause': True}}
)
A.addApproval('Code-Review', 2)
B.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.waitUntilSettled()
self.executor_server.release('common-job')
self.waitUntilSettled()
self.executor_server.release('project1-job')
self.waitUntilSettled()
self.assertHistory([
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
self.assertEqual(len(self.builds), 2)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",),
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project2-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
@simple_layout('layouts/job-dedup-auto-shared.yaml')
def test_job_deduplication_failed_node_request(self):
# Pause nodepool so we can fail the node request later
@ -2517,6 +2571,59 @@ class TestGerritCircularDependencies(ZuulTestCase):
], ordered=False)
self._assert_job_deduplication_check()
@simple_layout('layouts/job-dedup-paused-parent.yaml')
def test_job_deduplication_check_paused_parent(self):
# Pause a parent job
# Ensure it waits for all children before continuing
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
# A <-> B
A.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
A.subject, B.data["url"]
)
B.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
B.subject, A.data["url"]
)
self.executor_server.returnData(
'common-job', A,
{'zuul': {'pause': True}}
)
self.executor_server.returnData(
'common-job', B,
{'zuul': {'pause': True}}
)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('common-job')
self.waitUntilSettled()
self.executor_server.release('project1-job')
self.waitUntilSettled()
self.assertHistory([
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
], ordered=False)
self.assertEqual(len(self.builds), 2)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertHistory([
dict(name="common-job", result="SUCCESS", changes="2,1 1,1",
ref='refs/changes/01/1/1'),
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
dict(name="project2-job", result="SUCCESS", changes="1,1 2,1"),
], ordered=False)
self._assert_job_deduplication_check()
@simple_layout('layouts/job-dedup-auto-shared.yaml')
def test_job_deduplication_check_failed_node_request(self):
# Pause nodepool so we can fail the node request later

View File

@ -1858,14 +1858,17 @@ class PipelineManager(metaclass=ABCMeta):
"""
Resumes all paused builds of a buildset that may be resumed.
"""
job_graph = build_set.job_graph
for build in build_set.builds.values():
if not build.paused:
continue
# check if all child jobs are finished
child_builds = [build_set.builds.get(x.name) for x in
job_graph.getDependentJobsRecursively(
build.job.name)]
child_builds = []
for item in self._getItemsWithBuild(build):
job_graph = item.current_build_set.job_graph
child_builds += [
item.current_build_set.builds.get(x.name)
for x in job_graph.getDependentJobsRecursively(
build.job.name)]
all_completed = True
for child_build in child_builds:
if not child_build or not child_build.result: