From 387b134a526af40362d16e9333d3d60d4fd4b859 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Tue, 4 Dec 2018 17:28:07 -0500 Subject: [PATCH] Add cleanup routine to delete empty nodes We've discovered that our node deletion process has the possibility to leave empty (i.e., no data) node znodes in ZooKeeper. Although a fix for this has been merged, we need a way to remove this extraneous data. Change-Id: I6596060f5026088ce987e5d0d7c18b00a6b77c5a --- nodepool/launcher.py | 16 ++++++++++++++++ nodepool/tests/unit/test_launcher.py | 17 +++++++++++++++++ nodepool/zk.py | 20 +++++++++++++++----- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index e881ec275..8a2b7340a 100755 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -410,6 +410,7 @@ class CleanupWorker(BaseCleanupWorker): (self._cleanupLostRequests, 'lost request cleanup'), (self._cleanupMaxReadyAge, 'max ready age cleanup'), (self._cleanupMaxHoldAge, 'max hold age cleanup'), + (self._cleanupEmptyNodes, 'empty node cleanup'), ] def _resetLostRequest(self, zk_conn, req): @@ -616,6 +617,21 @@ class CleanupWorker(BaseCleanupWorker): finally: zk_conn.unlockNode(node) + def _cleanupEmptyNodes(self): + ''' + Remove any Node znodes that may be totally empty. + ''' + self.log.debug('Cleaning up empty nodes...') + zk_conn = self._nodepool.getZK() + + # We cannot use nodeIterator() here since that does not yield us + # empty nodes. + for node_id in zk_conn.getNodes(): + node = zk_conn.getNode(node_id) + if node is None: + self.log.debug("Removing empty node %s", node_id) + zk_conn.deleteRawNode(node_id) + def _run(self): ''' Catch exceptions individually so that other cleanup routines may diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index 49bb688b2..2d8994f34 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py @@ -1824,3 +1824,20 @@ class TestLauncher(tests.DBTestCase): self.assertTrue(req2.id > req1.id) self.assertTrue(req2.state_time < req1.state_time) + + def test_empty_node_deleted(self): + """Test that empty nodes are deleted by the cleanup thread""" + configfile = self.setup_config('node.yaml') + + # Create empty node + path = "%s" % self.zk._nodePath("12345") + self.log.debug("node path %s", path) + self.zk.client.create(path, makepath=True) + self.assertTrue(self.zk.client.exists(path)) + + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.cleanup_interval = .1 + pool.start() + + while self.zk.client.exists(path): + time.sleep(.1) diff --git a/nodepool/zk.py b/nodepool/zk.py index cfdb6df45..e42f64488 100755 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -1882,6 +1882,20 @@ class ZooKeeper(object): path = self._nodePath(node.id) self.client.set(path, node.serialize()) + def deleteRawNode(self, node_id): + ''' + Delete a znode for a Node. + + This is used to forcefully delete a Node znode that has somehow + ended up without any actual data. In most cases, you should be using + deleteNode() instead. + ''' + path = self._nodePath(node_id) + try: + self.client.delete(path, recursive=True) + except kze.NoNodeError: + pass + def deleteNode(self, node): ''' Delete a node. @@ -1898,11 +1912,7 @@ class ZooKeeper(object): # lock is removed before the node deletion occurs. node.state = DELETED self.client.set(path, node.serialize()) - - try: - self.client.delete(path, recursive=True) - except kze.NoNodeError: - pass + self.deleteRawNode(node.id) def getReadyNodesOfTypes(self, labels, cached=True): '''