Further fixes to check deduplication
This fixes a similar case as I86c159c82b858e67433bdaa1e479471b21ea8b86 but at a different point in the pipeline lifecycle. In both cases, a queue item in check has completed and been dequeued, while a second item that deduplicated one of its jobs remains. We store both the node request and nodeset information on the buildset object. When builds are finished, we clear the node request from the buildset object. Depending on timing, we may have information about the nodeset while we may or may not have the request id. We use the presence of nodeset information to decide whether to run a job, but we pass the node request to the executor for it to lock the nodeset. This patch handles the case where we have deduplicated nodeset information but have already cleared the node request id. In that case, we would attempt to run the job a second time while passing in None as the node request id. To correct this, if we learn that we are unable to restore information about a deduplicated build, clear all of the nodeset and node request information about that job from the secondary buildset so that when it attempts to re-run the build, it starts from a clean slate. This case may be a superset of the other case which means we won't hit the failsafe code added in I86c159c82b858e67433bdaa1e479471b21ea8b86. That should be fine, and we will remove code related to both cases once the circular dependency refactor is complete. Change-Id: I95511763236d5a99c14477989d573bd5c1726b76
This commit is contained in:
parent
edbe5405e8
commit
e6856c8f98
|
@ -2169,6 +2169,63 @@ class TestGerritCircularDependencies(ZuulTestCase):
|
|||
dict(name="project2-job2", result="SUCCESS", changes="2,1 1,1"),
|
||||
], ordered=False)
|
||||
|
||||
@simple_layout('layouts/job-dedup-multi-sched-complete.yaml')
|
||||
def test_job_deduplication_multi_scheduler_complete2(self):
|
||||
# Test that a second scheduler can correctly refresh
|
||||
# deduplicated builds after the first scheduler has completed
|
||||
# the builds for one item.
|
||||
|
||||
# This is similar to the previous test but starts the second
|
||||
# scheduler slightly later.
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
|
||||
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
|
||||
|
||||
# A <-> B
|
||||
A.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
|
||||
A.subject, B.data["url"]
|
||||
)
|
||||
B.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
|
||||
B.subject, A.data["url"]
|
||||
)
|
||||
A.addApproval('Code-Review', 2)
|
||||
B.addApproval('Code-Review', 2)
|
||||
|
||||
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('common-job')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('project1-job')
|
||||
self.waitUntilSettled()
|
||||
|
||||
app = self.createScheduler()
|
||||
app.start()
|
||||
self.assertEqual(len(self.scheds), 2)
|
||||
|
||||
# Hold the lock on the first scheduler so that only the second
|
||||
# will act.
|
||||
with self.scheds.first.sched.run_handler_lock:
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
|
||||
self.executor_server.release('project2-job2')
|
||||
self.waitUntilSettled(matcher=[app])
|
||||
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled(matcher=[app])
|
||||
|
||||
# We expect common-job to be "un-deduplicated".
|
||||
self.assertHistory([
|
||||
dict(name="project1-job", result="SUCCESS", changes="1,1 2,1"),
|
||||
dict(name="common-job", result="SUCCESS", changes="1,1 2,1"),
|
||||
dict(name="project2-job", result="SUCCESS", changes="1,1 2,1"),
|
||||
dict(name="project2-job2", result="SUCCESS", changes="1,1 2,1"),
|
||||
], ordered=False)
|
||||
|
||||
@simple_layout('layouts/job-dedup-noop.yaml')
|
||||
def test_job_deduplication_noop(self):
|
||||
# Test that we don't deduplicate noop (there's no good reason
|
||||
|
@ -2938,6 +2995,62 @@ class TestGerritCircularDependencies(ZuulTestCase):
|
|||
dict(name="project2-job2", result="SUCCESS", changes="1,1 2,1"),
|
||||
], ordered=False)
|
||||
|
||||
@simple_layout('layouts/job-dedup-multi-sched-complete.yaml')
|
||||
def test_job_deduplication_check_multi_scheduler_complete2(self):
|
||||
# Test that a second scheduler can correctly refresh
|
||||
# deduplicated builds after the first scheduler has completed
|
||||
# the builds for one item.
|
||||
|
||||
# This is similar to the previous test but starts the second
|
||||
# scheduler slightly later.
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
|
||||
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
|
||||
|
||||
# A <-> B
|
||||
A.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
|
||||
A.subject, B.data["url"]
|
||||
)
|
||||
B.data["commitMessage"] = "{}\n\nDepends-On: {}\n".format(
|
||||
B.subject, A.data["url"]
|
||||
)
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('common-job')
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.executor_server.release('project1-job')
|
||||
self.waitUntilSettled()
|
||||
|
||||
app = self.createScheduler()
|
||||
app.start()
|
||||
self.assertEqual(len(self.scheds), 2)
|
||||
|
||||
# Hold the lock on the first scheduler so that only the second
|
||||
# will act.
|
||||
with self.scheds.first.sched.run_handler_lock:
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
|
||||
self.executor_server.release('project2-job2')
|
||||
self.waitUntilSettled(matcher=[app])
|
||||
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled(matcher=[app])
|
||||
|
||||
# We expect common-job to be "un-deduplicated".
|
||||
self.assertHistory([
|
||||
dict(name="project1-job", result="SUCCESS", changes="2,1 1,1"),
|
||||
dict(name="common-job", result="SUCCESS", changes="2,1 1,1"),
|
||||
dict(name="project2-job", result="SUCCESS", changes="1,1 2,1"),
|
||||
dict(name="common-job", result="SUCCESS", changes="1,1 2,1"),
|
||||
dict(name="project2-job2", result="SUCCESS", changes="1,1 2,1"),
|
||||
], ordered=False)
|
||||
|
||||
@simple_layout('layouts/job-dedup-noop.yaml')
|
||||
def test_job_deduplication_check_noop(self):
|
||||
# Test that we don't deduplicate noop (there's no good reason
|
||||
|
|
|
@ -922,6 +922,7 @@ class PipelineState(zkobject.ZKObject):
|
|||
for build_job, build in buildset.builds.items():
|
||||
if isinstance(build, BuildReference):
|
||||
to_replace_dicts.append((item,
|
||||
buildset,
|
||||
buildset.builds,
|
||||
build_job,
|
||||
build._path))
|
||||
|
@ -931,12 +932,14 @@ class PipelineState(zkobject.ZKObject):
|
|||
for build in build_list:
|
||||
if isinstance(build, BuildReference):
|
||||
to_replace_lists.append((item,
|
||||
None,
|
||||
build_list,
|
||||
build,
|
||||
build._path))
|
||||
else:
|
||||
build_map[build.getPath()] = build
|
||||
for (item, build_dict, build_job, build_path) in to_replace_dicts:
|
||||
for (item, buildset, build_dict, build_job, build_path
|
||||
) in to_replace_dicts:
|
||||
orig_build = build_map.get(build_path)
|
||||
if orig_build:
|
||||
build_dict[build_job] = orig_build
|
||||
|
@ -944,7 +947,16 @@ class PipelineState(zkobject.ZKObject):
|
|||
log.warning("Unable to find deduplicated build %s for %s",
|
||||
build_path, item)
|
||||
del build_dict[build_job]
|
||||
for (item, build_list, build, build_path) in to_replace_lists:
|
||||
# We're not going to be able to use the results of
|
||||
# this deduplication, which means we're going to try
|
||||
# to launch the job again. To make sure that happens
|
||||
# cleanly, go ahead and remove any nodeset information
|
||||
# that we copied when we thought we were going to
|
||||
# deduplicate it.
|
||||
buildset.nodeset_info.pop(build_job, None)
|
||||
buildset.node_requests.pop(build_job, None)
|
||||
for (item, buildset, build_list, build, build_path
|
||||
) in to_replace_lists:
|
||||
idx = build_list.index(build)
|
||||
orig_build = build_map.get(build_path)
|
||||
if orig_build:
|
||||
|
|
Loading…
Reference in New Issue