Make estimatedNodepoolQuotaUsed more resilient
We had the case that we stored znodes without pool or type. At least znodes without type break the quota calculation and can lead to wedged providers. So make that more resilient and log exceptions per node instead of failing the complete calculation. This way we don't wedge in case we have bogus data in zk while still being able to debug what's wrong with certain znodes. Change-Id: I4a33ffbbf3684dc3830913ed8dc7b158f2426602
This commit is contained in:
parent
6fe861f42a
commit
41c968e3ac
|
@ -151,26 +151,30 @@ class OpenStackProvider(Provider):
|
|||
|
||||
for node in self._zk.nodeIterator():
|
||||
if node.provider == self.provider.name:
|
||||
if pool and not node.pool == pool.name:
|
||||
continue
|
||||
provider_pool = self.provider.pools.get(node.pool)
|
||||
if not provider_pool:
|
||||
self.log.warning(
|
||||
"Cannot find provider pool for node %s" % node)
|
||||
# This node is in a funny state we log it for debugging
|
||||
# but move on and don't account it as we can't properly
|
||||
# calculate its cost without pool info.
|
||||
continue
|
||||
if node.type[0] not in provider_pool.labels:
|
||||
self.log.warning(
|
||||
"Node type is not in provider pool for node %s" % node)
|
||||
# This node is also in a funny state; the config
|
||||
# may have changed under it. It should settle out
|
||||
# eventually when it's deleted.
|
||||
continue
|
||||
node_resources = self.quotaNeededByNodeType(
|
||||
node.type[0], provider_pool)
|
||||
used_quota.add(node_resources)
|
||||
try:
|
||||
if pool and not node.pool == pool.name:
|
||||
continue
|
||||
provider_pool = self.provider.pools.get(node.pool)
|
||||
if not provider_pool:
|
||||
self.log.warning(
|
||||
"Cannot find provider pool for node %s" % node)
|
||||
# This node is in a funny state we log it for debugging
|
||||
# but move on and don't account it as we can't properly
|
||||
# calculate its cost without pool info.
|
||||
continue
|
||||
if node.type[0] not in provider_pool.labels:
|
||||
self.log.warning("Node type is not in provider pool "
|
||||
"for node %s" % node)
|
||||
# This node is also in a funny state; the config
|
||||
# may have changed under it. It should settle out
|
||||
# eventually when it's deleted.
|
||||
continue
|
||||
node_resources = self.quotaNeededByNodeType(
|
||||
node.type[0], provider_pool)
|
||||
used_quota.add(node_resources)
|
||||
except Exception:
|
||||
self.log.exception("Couldn't consider invalid node %s "
|
||||
"for quota:" % node)
|
||||
return used_quota
|
||||
|
||||
def unmanagedQuotaUsed(self):
|
||||
|
|
|
@ -729,6 +729,39 @@ class TestLauncher(tests.DBTestCase):
|
|||
# retries in config is set to 2, so 2 attempts to create a server
|
||||
self.assertEqual(0, manager.createServer_fails)
|
||||
|
||||
def test_node_launch_with_broken_znodes(self):
|
||||
"""Test that node launch still works if there are broken znodes"""
|
||||
# Create a znode without type
|
||||
znode = zk.Node()
|
||||
znode.provider = 'fake-provider'
|
||||
znode.pool = 'main'
|
||||
znode.external_id = 'fakeid'
|
||||
znode.state = zk.READY
|
||||
|
||||
# Create znode without pool
|
||||
self.zk.storeNode(znode)
|
||||
znode = zk.Node()
|
||||
znode.provider = 'fake-provider'
|
||||
znode.type = ['fake-label']
|
||||
znode.external_id = 'fakeid'
|
||||
znode.state = zk.READY
|
||||
self.zk.storeNode(znode)
|
||||
|
||||
configfile = self.setup_config('node_launch_retry.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
self.useBuilder(configfile)
|
||||
pool.start()
|
||||
self.wait_for_config(pool)
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
req = self.waitForNodeRequest(req)
|
||||
self.assertEqual(req.state, zk.FULFILLED)
|
||||
|
||||
def test_node_launch_retries_with_external_id(self):
|
||||
configfile = self.setup_config('node_launch_retry.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
|
|
Loading…
Reference in New Issue