Fix paused handler exception handling

If a request handler is paused and an exception is thrown during
normal events, that exception handling code was not properly unpausing
the request and removing it from the list of active handlers.

Change-Id: I0cfd8294f9ce88e5918654a4847776c981ee4fb3
This commit is contained in:
David Shrewsbury 2018-06-14 13:55:23 -04:00
parent f3f7e41f21
commit 58ccd2c718
2 changed files with 74 additions and 20 deletions

View File

@ -496,16 +496,7 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
self.log.debug("Declining node request %s because %s",
self.request.id, ', '.join(declined_reasons))
self.decline_request()
self.unlockNodeSet(clear_allocation=True)
# If conditions have changed for a paused request to now cause us
# to decline it, we need to unpause so we don't keep trying it
if self.paused:
self.paused = False
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
self.done = True
self._declinedHandlerCleanup()
return
if self.paused:
@ -517,6 +508,27 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
self._waitForNodeSet()
def _declinedHandlerCleanup(self):
"""
After declining a request, do necessary cleanup actions.
"""
self.unlockNodeSet(clear_allocation=True)
# If conditions have changed for a paused request to now cause us
# to decline it, we need to unpause so we don't keep trying it
if self.paused:
self.paused = False
try:
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
except Exception:
# If the request is gone for some reason, we need to make
# sure that self.done still gets set.
self.log.exception("Unable to modify missing request %s",
self.request.id)
self.done = True
# ---------------------------------------------------------------
# Public methods
# ---------------------------------------------------------------
@ -576,16 +588,7 @@ class NodeRequestHandler(object, metaclass=abc.ABCMeta):
"Declining node request %s due to exception in "
"NodeRequestHandler:", self.request.id)
self.decline_request()
self.unlockNodeSet(clear_allocation=True)
try:
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
except Exception:
# If the request is gone for some reason, we need to make
# sure that self.done still gets set.
self.log.exception("Unable to decline missing request %s",
self.request.id)
self.done = True
self._declinedHandlerCleanup()
def poll(self):
if self.paused:

View File

@ -1370,3 +1370,54 @@ class TestLauncher(tests.DBTestCase):
self.assertEqual(1, mock_poll.call_count)
self.assertEqual(0, len(
pool._pool_threads["fake-provider-main"].request_handlers))
def test_exception_causing_decline_of_paused_request(self):
"""
Test that a paused request, that later gets declined because of
an exception (say, thrown from a provider operation), unpauses
and removes the request handler.
"""
# First config has max-servers set to 2
configfile = self.setup_config('pause_declined_1.yaml')
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
# Create a request that uses all capacity (2 servers)
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
self.assertEqual(len(req.nodes), 2)
# Now that we have 2 nodes in use, create another request that
# requests two nodes, which should cause the request to pause.
req2 = zk.NodeRequest()
req2.state = zk.REQUESTED
req2.node_types.append('fake-label')
req2.node_types.append('fake-label')
self.zk.storeNodeRequest(req2)
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
# Force an exception within the run handler.
pool_worker = pool.getPoolWorkers('fake-provider')
while not pool_worker[0].paused_handler:
time.sleep(0.1)
pool_worker[0].paused_handler.hasProviderQuota = mock.Mock(
side_effect=Exception('mock exception')
)
# The above exception should cause us to fail the paused request.
req2 = self.waitForNodeRequest(req2, (zk.FAILED,))
self.assertNotEqual(req2.declined_by, [])
# The exception handling should make sure that we unpause AND remove
# the request handler.
while pool_worker[0].paused_handler:
time.sleep(0.1)
self.assertEqual(0, len(pool_worker[0].request_handlers))