Ensure instance_info is clean before deploy and after failure

Currently stale values can be left in instance_info (and reused next time)
if a deployment attempts fails. This change ensures that we:
1) Purge instance_info completely on failures
2) Only keep traits and capabilities in instance_info on deploy.

Change-Id: I52a85620d9ac2f471bca6498294871f3bb16d47f
This commit is contained in:
Dmitry Tantsur 2019-03-08 14:29:45 +01:00
parent d64e020fd0
commit 8e7b8d3f39
3 changed files with 21 additions and 8 deletions

View File

@ -34,6 +34,7 @@ LOG = logging.getLogger(__name__)
_CREATED_PORTS = 'metalsmith_created_ports'
_ATTACHED_PORTS = 'metalsmith_attached_ports'
_PRESERVE_INSTANCE_INFO_KEYS = {'capabilities', 'traits'}
class Provisioner(_utils.GetNodeMixin):
@ -279,7 +280,7 @@ class Provisioner(_utils.GetNodeMixin):
capabilities['boot_option'] = 'netboot' if netboot else 'local'
instance_info = node.instance_info.copy()
instance_info = self._clean_instance_info(node.instance_info)
instance_info['root_gb'] = root_size_gb
instance_info['capabilities'] = capabilities
instance_info[self.HOSTNAME_FIELD] = hostname
@ -360,6 +361,11 @@ class Provisioner(_utils.GetNodeMixin):
nodes, 'active', timeout=timeout)
return [_instance.Instance(self.connection, node) for node in nodes]
def _clean_instance_info(self, instance_info):
return {key: value
for key, value in instance_info.items()
if key in _PRESERVE_INSTANCE_INFO_KEYS}
def _clean_up(self, node, nics=None):
if nics is None:
created_ports = node.extra.get(_CREATED_PORTS, [])
@ -372,17 +378,14 @@ class Provisioner(_utils.GetNodeMixin):
extra = node.extra.copy()
for item in (_CREATED_PORTS, _ATTACHED_PORTS):
extra.pop(item, None)
instance_info = node.instance_info.copy()
instance_info.pop(self.HOSTNAME_FIELD, None)
LOG.debug('Updating node %(node)s with instance info %(iinfo)s '
'and extras %(extra)s and releasing the lock',
LOG.debug('Updating node %(node)s with empty instance info (was '
'%(iinfo)s) and extras %(extra)s and releasing the lock',
{'node': _utils.log_res(node),
'iinfo': instance_info,
'iinfo': node.instance_info,
'extra': extra})
try:
self.connection.baremetal.update_node(
node, instance_info=instance_info, extra=extra,
instance_id=None)
node, instance_info={}, extra=extra, instance_id=None)
except Exception as exc:
LOG.debug('Failed to clear node %(node)s extra: %(exc)s',
{'node': _utils.log_res(node), 'exc': exc})

View File

@ -505,6 +505,9 @@ class TestProvisionNode(Base):
self.image.ramdisk_id = None
del self.instance_info['kernel']
del self.instance_info['ramdisk']
# Ensure stale values clean up
self.node.instance_info['kernel'] = 'bad value'
self.node.instance_info['ramdisk'] = 'bad value'
self.pr.provision_node(self.node, 'image', [{'network': 'network'}])

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes stale ``instance_info`` remaining after deploy failures.
- |
Cleans up ``instance_info`` before updating it before deployment to make
sure not stale information is left there.