Merge "Delete a compute node's resource provider when node is deleted"

This commit is contained in:
Jenkins 2017-02-03 18:08:56 +00:00 committed by Gerrit Code Review
commit 245c54796f
5 changed files with 136 additions and 1 deletions

View File

@ -6583,6 +6583,11 @@ class ComputeManager(manager.Manager):
{'id': cn.id, 'hh': cn.hypervisor_hostname,
'nodes': nodenames})
cn.destroy()
# Delete the corresponding resource provider in placement,
# along with any associated allocations and inventory.
# TODO(cdent): Move use of reportclient into resource tracker.
self.scheduler_client.reportclient.delete_resource_provider(
context, cn, cascade=True)
def _get_compute_nodes_in_db(self, context, use_slave=False):
try:

View File

@ -749,3 +749,40 @@ class SchedulerReportClient(object):
LOG.warning(_LW('Deleting stale allocation for instance %s'),
uuid)
self._delete_allocation_for_instance(uuid)
@safe_connect
def delete_resource_provider(self, context, compute_node, cascade=False):
"""Deletes the ResourceProvider record for the compute_node.
:param context: The security context
:param compute_node: The nova.objects.ComputeNode object that is the
resource provider being deleted.
:param cascade: Boolean value that, when True, will first delete any
associated Allocation and Inventory records for the
compute node
"""
nodename = compute_node.hypervisor_hostname
host = compute_node.host
rp_uuid = compute_node.uuid
if cascade:
# Delete any allocations for this resource provider.
# Since allocations are by consumer, we get the consumers on this
# host, which are its instances.
instances = objects.InstanceList.get_by_host_and_node(context,
host, nodename)
for instance in instances:
self._delete_allocation_for_instance(instance.uuid)
url = "/resource_providers/%s" % rp_uuid
resp = self.delete(url)
if resp:
LOG.info(_LI("Deleted resource provider %s"), rp_uuid)
else:
# Check for 404 since we don't need to log a warning if we tried to
# delete something which doesn"t actually exist.
if resp.status_code != 404:
LOG.warning(
_LW("Unable to delete resource provider "
"%(uuid)s: (%(code)i %(text)s)"),
{"uuid": rp_uuid,
"code": resp.status_code,
"text": resp.text})

View File

@ -210,12 +210,14 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.assertTrue(log_mock.info.called)
self.assertIsNone(self.compute._resource_tracker)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_resource_provider')
@mock.patch.object(manager.ComputeManager,
'update_available_resource_for_node')
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
@mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db')
def test_update_available_resource(self, get_db_nodes, get_avail_nodes,
update_mock):
update_mock, del_rp_mock):
db_nodes = [self._make_compute_node('node%s' % i, i)
for i in range(1, 5)]
avail_nodes = set(['node2', 'node3', 'node4', 'node5'])
@ -233,6 +235,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
for db_node in db_nodes:
if db_node.hypervisor_hostname == 'node1':
db_node.destroy.assert_called_once_with()
del_rp_mock.assert_called_once_with(self.context, db_node,
cascade=True)
else:
self.assertFalse(db_node.destroy.called)

View File

@ -1301,3 +1301,83 @@ class TestAllocations(SchedulerReportClientTestCase):
mock_log.info.assert_not_called()
# make sure warning wasn't called for the 404
mock_log.warning.assert_not_called()
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"delete")
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"_delete_allocation_for_instance")
@mock.patch("nova.objects.InstanceList.get_by_host_and_node")
def test_delete_resource_provider_cascade(self, mock_by_host,
mock_del_alloc, mock_delete):
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
hypervisor_hostname="fake_hostname", )
inst1 = objects.Instance(uuid=uuids.inst1)
inst2 = objects.Instance(uuid=uuids.inst2)
mock_by_host.return_value = objects.InstanceList(
objects=[inst1, inst2])
resp_mock = mock.Mock(status_code=204)
mock_delete.return_value = resp_mock
self.client.delete_resource_provider(self.context, cn, cascade=True)
self.assertEqual(2, mock_del_alloc.call_count)
exp_url = "/resource_providers/%s" % uuids.cn
mock_delete.assert_called_once_with(exp_url)
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"delete")
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"_delete_allocation_for_instance")
@mock.patch("nova.objects.InstanceList.get_by_host_and_node")
def test_delete_resource_provider_no_cascade(self, mock_by_host,
mock_del_alloc, mock_delete):
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
hypervisor_hostname="fake_hostname", )
inst1 = objects.Instance(uuid=uuids.inst1)
inst2 = objects.Instance(uuid=uuids.inst2)
mock_by_host.return_value = objects.InstanceList(
objects=[inst1, inst2])
resp_mock = mock.Mock(status_code=204)
mock_delete.return_value = resp_mock
self.client.delete_resource_provider(self.context, cn)
mock_del_alloc.assert_not_called()
exp_url = "/resource_providers/%s" % uuids.cn
mock_delete.assert_called_once_with(exp_url)
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
"delete")
@mock.patch('nova.scheduler.client.report.LOG')
def test_delete_resource_provider_log_calls(self, mock_log, mock_delete):
# First, check a successful call
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
hypervisor_hostname="fake_hostname", )
resp_mock = mock.MagicMock(status_code=204)
try:
resp_mock.__nonzero__.return_value = True
except AttributeError:
# py3 uses __bool__
resp_mock.__bool__.return_value = True
mock_delete.return_value = resp_mock
self.client.delete_resource_provider(self.context, cn)
# With a 204, only the info should be called
self.assertEqual(1, mock_log.info.call_count)
self.assertEqual(0, mock_log.warning.call_count)
# Now check a 404 response
mock_log.reset_mock()
resp_mock.status_code = 404
try:
resp_mock.__nonzero__.return_value = False
except AttributeError:
# py3 uses __bool__
resp_mock.__bool__.return_value = False
self.client.delete_resource_provider(self.context, cn)
# With a 404, neither log message should be called
self.assertEqual(0, mock_log.info.call_count)
self.assertEqual(0, mock_log.warning.call_count)
# Finally, check a 409 response
mock_log.reset_mock()
resp_mock.status_code = 409
self.client.delete_resource_provider(self.context, cn)
# With a 409, only the warning should be called
self.assertEqual(0, mock_log.info.call_count)
self.assertEqual(1, mock_log.warning.call_count)

View File

@ -0,0 +1,9 @@
---
issues:
- |
Ironic nodes that were deleted from ironic's database during Newton
may result in orphaned resource providers causing incorrect scheduling
decisions, leading to a reschedule. If this happens, the orphaned
resource providers will need to be identified and removed.
See also https://bugs.launchpad.net/nova/+bug/1661258