Add autohold debug info
Also: * Check the lock before attempting to write the znodes (matches other methodes in nodepool.py). * Process exceptions one level higher in the scheduler, and separate autohold exceptions from nodeset return exceptions, so that in the unlikely event we throw an exception dealing with the autohold, we just might later succeed when returning the node. * Don't remove the autohold request on exception. The error is most likely to come from a zookeeper problem and not be systemic. Let Zuul try again after the system has recovered. Change-Id: Idba331576a43f738883d61be72a6f400c233bf0e
This commit is contained in:
parent
0901777d37
commit
acef0f5ee8
|
@ -81,16 +81,19 @@ class Nodepool(object):
|
|||
|
||||
def holdNodeSet(self, nodeset, autohold_key):
|
||||
'''
|
||||
If requested, perform a hold on the given set of nodes.
|
||||
Perform a hold on the given set of nodes.
|
||||
|
||||
:param NodeSet nodeset: The object containing the set of nodes to hold.
|
||||
:param set autohold_key: A set with the tenant/project/job names
|
||||
associated with the given NodeSet.
|
||||
'''
|
||||
self.log.info("Holding nodeset %s" % (nodeset,))
|
||||
(hold_iterations, reason) = self.sched.autohold_requests[autohold_key]
|
||||
nodes = nodeset.getNodes()
|
||||
|
||||
for node in nodes:
|
||||
if node.lock is None:
|
||||
raise Exception("Node %s is not locked" % (node,))
|
||||
node.state = model.STATE_HOLD
|
||||
node.hold_job = " ".join(autohold_key)
|
||||
node.comment = reason
|
||||
|
|
|
@ -1019,6 +1019,7 @@ class Scheduler(threading.Thread):
|
|||
# of requests - the most specific is selected.
|
||||
autohold_key = None
|
||||
scope = Scope.NONE
|
||||
self.log.debug("Checking build autohold key %s", autohold_key_base)
|
||||
for request in self.autohold_requests:
|
||||
ref_filter = request[-1]
|
||||
if not autohold_key_base_issubset(autohold_key_base, request) \
|
||||
|
@ -1032,6 +1033,8 @@ class Scheduler(threading.Thread):
|
|||
else:
|
||||
candidate_scope = Scope.REF
|
||||
|
||||
self.log.debug("Build autohold key %s matched scope %s",
|
||||
autohold_key_base, candidate_scope)
|
||||
if candidate_scope > scope:
|
||||
scope = candidate_scope
|
||||
autohold_key = request
|
||||
|
@ -1039,7 +1042,6 @@ class Scheduler(threading.Thread):
|
|||
return autohold_key
|
||||
|
||||
def _processAutohold(self, build):
|
||||
|
||||
# We explicitly only want to hold nodes for jobs if they have
|
||||
# failed / retry_limit / post_failure and have an autohold request.
|
||||
hold_list = ["FAILURE", "RETRY_LIMIT", "POST_FAILURE"]
|
||||
|
@ -1047,16 +1049,9 @@ class Scheduler(threading.Thread):
|
|||
return
|
||||
|
||||
autohold_key = self._getAutoholdRequestKey(build)
|
||||
try:
|
||||
if autohold_key is not None:
|
||||
self.nodepool.holdNodeSet(build.nodeset, autohold_key)
|
||||
except Exception:
|
||||
self.log.exception("Unable to process autohold for %s:",
|
||||
autohold_key)
|
||||
if autohold_key in self.autohold_requests:
|
||||
self.log.debug("Removing autohold %s due to exception",
|
||||
autohold_key)
|
||||
del self.autohold_requests[autohold_key]
|
||||
self.log.debug("Got autohold key %s", autohold_key)
|
||||
if autohold_key is not None:
|
||||
self.nodepool.holdNodeSet(build.nodeset, autohold_key)
|
||||
|
||||
def _doBuildCompletedEvent(self, event):
|
||||
build = event.build
|
||||
|
@ -1066,6 +1061,9 @@ class Scheduler(threading.Thread):
|
|||
# the nodes to nodepool.
|
||||
try:
|
||||
self._processAutohold(build)
|
||||
except Exception:
|
||||
self.log.exception("Unable to process autohold for %s" % build)
|
||||
try:
|
||||
self.nodepool.returnNodeSet(build.nodeset)
|
||||
except Exception:
|
||||
self.log.exception("Unable to return nodeset %s" % build.nodeset)
|
||||
|
|
Loading…
Reference in New Issue