Error with clarity when a bad upgrade was encountered

Metalsmith is generally used as a frontend for baremetal
provisioning such that a user can ask for baremetal to be
deployed with a simplified command line. The challenge is
it artificially hydrates together data from several queries
to return a list of instances which have been deployed
or are managed using metalsmith. The challenge is if someone
didn't create an allocation record, or if someone used the
metalsmith modules to do so when the node was in a maintenance
state, for example if BMC communication was not working.

The underlying challenge is this results in a lack of an allocation
record from being created. This can be manually corrected, and instead
of providing a vague error, metalsmith now provides a detailed error
with the command to remedy the error state through manual creation of
the allocation record, which allows "metalsmith list" to work as
expected.

Change-Id: I033a9925fe798f329b7bb9a629da0cd0f1202683
This commit is contained in:
Julia Kreger 2024-05-08 07:40:11 -07:00
parent cbb0f359d0
commit dd5dc4e471
2 changed files with 96 additions and 13 deletions

View File

@ -682,17 +682,49 @@ class Provisioner(object):
'with a node' % node)
def _get_instance(self, ident):
allocation = None
if hasattr(ident, 'allocation_id'):
node = ident
try:
try:
allocation = Provisioner.allocations_cache[
node.instance_id]
except KeyError:
allocation = self.connection.baremetal.get_allocation(
node.allocation_id)
except os_exc.ResourceNotFound as exc:
raise exceptions.InstanceNotFound(str(exc))
allocation = Provisioner.allocations_cache[
node.instance_id]
except KeyError:
# NOTE(TheJulia): The pattern here assumes we just haven't
# found the allocation entry, so we try to get it.
if node.allocation_id:
try:
allocation = self.connection.baremetal.get_allocation(
node.allocation_id)
except os_exc.ResourceNotFound as exc:
raise exceptions.InstanceNotFound(str(exc))
elif node.instance_id:
LOG.debug('Discovered node %s without an '
'allocation when we believe it should have '
'an allocation based on the Metalsmith use '
'model. Metalsmith is likely being used '
'in an unsupported case, either in concert '
'with another user of Ironic, OR in a case '
'where a migration was executed incorrectly.',
node.id)
# We have an instance_id, and to be here we have
# no allocation ID.
fix_cmd = ('openstack baremetal allocation create --uuid '
'%(node_instance_id)s --name %(node_name)s '
'--node %(node_id)s' %
{'node_instance_id': node.instance_id,
'node_name': node.name,
'node_id': node.id})
msg = ('A error has been detected in the state of the '
'instance allocation records inside of Ironic '
'where a node has an assigned Instance ID, but '
'no allocation record. If only Metalsmith is '
'being used with Ironic, this can be safely '
'corrected with manual intervention. '
'To correct this, execute this command: '
'%(cmd)s' %
{'cmd': fix_cmd})
raise exceptions.InstanceNotFound(msg)
else:
node, allocation = self._find_node_and_allocation(ident)
if allocation is None and node.allocation_id:

View File

@ -2071,19 +2071,70 @@ class TestListInstances(Base):
for state in ('active', 'active', 'deploying', 'wait call-back',
'deploy failed', 'available', 'available', 'enroll')
]
self.nodes[0].allocation_id = 'id2'
self.nodes[0].allocation_id = 'id0'
self.nodes[1].allocation_id = 'id1'
self.nodes[2].allocation_id = 'id2'
self.nodes[3].allocation_id = 'id3'
self.nodes[4].allocation_id = 'id4'
# Ends up exercising the alternate path and would
# not appear in the list.
self.nodes[5].instance_id = None
self.nodes[6].instance_id = None
self.nodes[7].instance_id = None
self.api.baremetal.nodes.return_value = self.nodes
self.allocations = [mock.Mock(id='id2')]
self.allocations = [
mock.Mock(id='id0'),
mock.Mock(id='id1'),
mock.Mock(id='id2'),
mock.Mock(id='id3'),
mock.Mock(id='id4'),
]
self.api.baremetal.allocations.return_value = self.allocations
def test_list(self):
instances = self.pr.list_instances()
self.assertTrue(all(isinstance(i, _instance.Instance)
for i in instances))
self.assertEqual(self.nodes[:6], [i.node for i in instances])
self.assertEqual([self.api.baremetal.get_allocation.return_value] * 6,
[i.allocation for i in instances])
self.assertEqual(self.nodes[:5], [i.node for i in instances])
allocations = [self.api.baremetal.get_allocation.return_value] * 5
self.assertEqual(allocations, [i.allocation for i in instances])
self.api.baremetal.nodes.assert_called_once_with(associated=True,
details=True)
self.api.baremetal.allocations.assert_called_once()
class TestListInstancesBadUpgrade(Base):
def setUp(self):
super(TestListInstancesBadUpgrade, self).setUp()
self.nodes = [
mock.Mock(spec=NODE_FIELDS, provision_state=state,
instance_id='1234', allocation_id=None)
for state in ('active', 'active')
]
self.nodes[0].allocation_id = 'id0'
self.nodes[1].id = '0001'
self.nodes[1].allocation_id = None
self.nodes[1].name = 'fake_name'
self.nodes[1].instance_id = 'id5'
self.api.baremetal.nodes.return_value = self.nodes
self.allocations = [
mock.Mock(id='id0'),
]
self.api.baremetal.allocations.return_value = self.allocations
def test_list(self):
self.assertRaisesRegex(
exceptions.InstanceNotFound,
'A error has been detected in the state of '
'the instance allocation records inside of '
'Ironic where a node has an assigned Instance '
'ID, but no allocation record. If only '
'Metalsmith is being used with Ironic, '
'this can be safely corrected with manual '
'intervention. To correct this, execute this '
'command: openstack baremetal allocation '
'create --uuid id5 --name fake_name --node 0001',
self.pr.list_instances)
self.api.baremetal.nodes.assert_called_once_with(associated=True,
details=True)
self.api.baremetal.allocations.assert_called_once()