Add min-retention-time to metastatic driver

This adds the ability to configure the minimum retention time for
the backing node of a metastatic resource.  This is useful for
cloud resources with minimum billing intervals (e.g., you are billed
for 24 hours of an instance even if you use it less).

Change-Id: Ibb0588f244fc6697950f0401145e0ec5ad2482c3
This commit is contained in:
James E. Blair 2024-04-24 11:03:50 -07:00
parent 5ff7dd8289
commit c253ebfef3
6 changed files with 109 additions and 4 deletions

View File

@ -193,6 +193,16 @@ itself, which is "meta".
arrive for this label before deleting the backing node.
Set this value to the amount of time in seconds to wait.
.. attr:: min-retention-time
:type: int
If this value is set, the backing node will not be
deleted unless this amount of time (in seconds) has
passed since the backing node was launched. For backing
node resources with minimum billing times, this can be
used to ensure that the backing node is retained for at
least the minimum billing interval.
.. attr:: host-key-checking
:type: bool
:default: False

View File

@ -218,6 +218,7 @@ class BackingNodeRecord:
self.request_id = None
self.allocated_nodes = [None for x in range(slot_count)]
self.failed = False
self.launched = time.time()
self.last_used = time.time()
def hasAvailableSlot(self):
@ -296,14 +297,18 @@ class MetastaticAdapter(statemachine.Adapter):
label_config = self.provider._getLabel(bnr.label_name)
if label_config:
grace_time = label_config.grace_time
min_time = label_config.min_retention_time
else:
# The label doesn't exist in our config any more,
# it must have been removed.
grace_time = 0
min_time = 0
if bnr.failed:
grace_time = 0
min_time = 0
if (bnr.isEmpty() and
now - bnr.last_used > grace_time):
now - bnr.last_used > grace_time and
now - bnr.launched > min_time):
self.log.info("Backing node %s has been idle for "
"%s seconds, releasing",
bnr.node_id, now - bnr.last_used)
@ -373,6 +378,7 @@ class MetastaticAdapter(statemachine.Adapter):
user_data['slots'])
backing_node_record.node_id = node.id
backing_node_record.failed = user_data.get('failed', False)
backing_node_record.launched = user_data.get('launched', 0)
self.log.info("Found backing node %s for %s",
node.id, user_data['label'])
self._addBackingNode(user_data['label'],
@ -454,6 +460,7 @@ class MetastaticAdapter(statemachine.Adapter):
'label': bnr.label_name,
'slots': bnr.slot_count,
'failed': bnr.failed,
'launched': bnr.launched,
})
def _checkBackingNodeRequests(self):

View File

@ -45,6 +45,7 @@ class MetastaticLabel(ConfigValue):
self.cloud_image = MetastaticCloudImage()
self.max_parallel_jobs = label.get('max-parallel-jobs', 1)
self.grace_time = label.get('grace-time', 60)
self.min_retention_time = label.get('min-retention-time', 0)
self.host_key_checking = label.get('host-key-checking',
self.pool.host_key_checking)
@ -55,6 +56,7 @@ class MetastaticLabel(ConfigValue):
v.Required('backing-label'): str,
'max-parallel-jobs': int,
'grace-time': int,
'min-retention-time': int,
'host-key-checking': bool,
}
@ -63,7 +65,8 @@ class MetastaticLabel(ConfigValue):
return (
self.backing_label == other.backing_label and
self.max_parallel_jobs == other.max_parallel_jobs and
self.grace_time == other.grace_time
self.grace_time == other.grace_time and
self.min_retention_time == other.min_retention_time
)

View File

@ -21,6 +21,10 @@ labels:
min-ready: 0
- name: user-label
min-ready: 0
- name: backing-label-min-retention
min-ready: 0
- name: user-label-min-retention
min-ready: 0
providers:
# The backing node provider: a cloud
@ -42,6 +46,10 @@ providers:
cloud-image: fake-image
min-ram: 8192
flavor-name: 'Fake'
- name: backing-label-min-retention
cloud-image: fake-image
min-ram: 8192
flavor-name: 'Fake'
- name: meta-provider
driver: metastatic
@ -59,3 +67,8 @@ providers:
max-parallel-jobs: 2
grace-time: 2
host-key-checking: true
- name: user-label-min-retention
backing-label: backing-label-min-retention
grace-time: 2
min-retention-time: 300
host-key-checking: true

View File

@ -35,11 +35,11 @@ class TestDriverMetastatic(tests.DBTestCase):
StateMachineProvider.MINIMUM_SLEEP = 0.1
StateMachineProvider.MAXIMUM_SLEEP = 1
def _requestNode(self):
def _requestNode(self, label='user-label'):
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.tenant_name = 'tenant-1'
req.node_types.append('user-label')
req.node_types.append(label)
self.zk.storeNodeRequest(req)
req = self.waitForNodeRequest(req)
@ -307,3 +307,68 @@ class TestDriverMetastatic(tests.DBTestCase):
self.waitForNodeDeletion(bn1)
nodes = self._getNodes()
self.assertEqual(nodes, [])
def test_metastatic_min_retention(self):
# Test that the metastatic driver honors min-retention
configfile = self.setup_config('metastatic.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.startPool(pool)
manager = pool.getProviderManager('fake-provider')
pool_worker = pool.getPoolWorkers('meta-provider')[0]
pool_config = pool_worker.getPoolConfig()
self.assertEqual(pool_config.max_servers, 10)
self.assertEqual(pool_config.priority, 1)
manager.adapter._client.create_image(name="fake-image")
# Request a node, verify that there is a backing node, and it
# has the same connection info
node1 = self._requestNode('user-label-min-retention')
nodes = self._getNodes()
self.assertEqual(len(nodes), 2)
self.assertEqual(nodes[0], node1)
self.assertNotEqual(nodes[1], node1)
bn1 = nodes[1]
self.assertEqual(bn1.provider, 'fake-provider')
self.assertEqual(bn1.interface_ip, node1.interface_ip)
self.assertEqual(bn1.python_path, node1.python_path)
self.assertEqual('auto', node1.python_path)
self.assertEqual(bn1.shell_type, node1.shell_type)
self.assertEqual(bn1.cloud, node1.cloud)
self.assertEqual(None, node1.shell_type)
self.assertEqual(bn1.host_keys, node1.host_keys)
self.assertEqual(['ssh-rsa FAKEKEY'], node1.host_keys)
self.assertEqual(bn1.id, node1.driver_data['backing_node'])
self.assertEqual(bn1.attributes, {
'backattr': 'back',
'testattr': 'backing',
})
self.assertEqual(node1.attributes, {
'backattr': 'back',
'metaattr': 'meta',
'testattr': 'metastatic',
})
# Delete node, verify that backing node still exists
node1.state = zk.DELETING
self.zk.storeNode(node1)
self.waitForNodeDeletion(node1)
# This has the side effect of deleting the backing node when
# idle, but it should not in this case because the
# min-retention time is not met.
meta_manager = pool.getProviderManager('meta-provider')
meta_manager.adapter.listResources()
nodes = self._getNodes()
self.assertEqual(nodes, [bn1])
self.assertEqual(nodes[0].state, zk.IN_USE)
# Falsify the launch time to trigger cleanup:
bnr = meta_manager.adapter.backing_node_records[
'user-label-min-retention'][0]
bnr.launched = 0
meta_manager.adapter.listResources()
nodes = self._getNodes()
self.waitForNodeDeletion(bn1)

View File

@ -0,0 +1,7 @@
---
features:
- |
The metastatic driver now supports a
:attr:`providers.[metastatic].pools.labels.min-retention-time`
attribute to set the minimum retention time for a backing node
(useful for cloud resources with minimum billing intervals).