Avoid inventory DELETE API (no conflict detection)

SchedulerReportClient._delete_inventory used the DELETE
/resource_providers/{u}/inventories API, which does not provide a way to
send the generation down (see bug 1746373), and is therefore subject to
concurrency errors.

This change set removes the _delete_inventory method and uses PUT
/resource_providers/{u}/inventories with an empty 'inventories' dict
instead, because that API *does* take the generation in the payload.

Doing this makes moot part of bug 1746075 by removing one of the code
paths that exploits undocumented knowledge of how placement uses the
generation counter.

Change-Id: Ia8131b32debd56b28315ef430ee4e8888b6f56e7
Closes-Bug: #1746374
Related-Bug: #1746373
Related-Bug: #1746075
This commit is contained in:
Eric Fried 2018-01-30 21:53:52 -06:00
parent 87036b4b27
commit 26c593c91f
2 changed files with 23 additions and 269 deletions

View File

@ -939,66 +939,6 @@ class SchedulerReportClient(object):
time.sleep(1)
return False
@safe_connect
def _delete_inventory(self, context, rp_uuid):
"""Deletes all inventory records for a resource provider with the
supplied UUID.
"""
if not self._provider_tree.has_inventory(rp_uuid):
return None
curr = self._refresh_and_get_inventory(context, rp_uuid)
# Check to see if we need to update placement's view
if not curr.get('inventories', {}):
msg = "No inventory to delete from resource provider %s."
LOG.debug(msg, rp_uuid)
return
msg = ("Resource provider %s reported no inventory but previous "
"inventory was detected. Deleting existing inventory records.")
LOG.info(msg, rp_uuid)
cur_gen = curr['resource_provider_generation']
url = '/resource_providers/%s/inventories' % rp_uuid
r = self.delete(url, version="1.5",
global_request_id=context.global_id)
placement_req_id = get_placement_request_id(r)
msg_args = {
'rp_uuid': rp_uuid,
'placement_req_id': placement_req_id,
}
if r.status_code == 204:
self._provider_tree.update_inventory(rp_uuid, {}, cur_gen + 1)
LOG.info("[%(placement_req_id)s] Deleted all inventory for "
"resource provider %(rp_uuid)s.", msg_args)
return
elif r.status_code == 404:
# This can occur if another thread deleted the inventory and the
# resource provider already
LOG.debug("[%(placement_req_id)s] Resource provider %(rp_uuid)s "
"deleted by another thread when trying to delete "
"inventory. Ignoring.",
msg_args)
self._provider_tree.remove(rp_uuid)
self.association_refresh_time.pop(rp_uuid, None)
return
elif r.status_code == 409:
rc_str = _extract_inventory_in_use(r.text)
if rc_str is not None:
msg = ("[%(placement_req_id)s] We cannot delete inventory "
"%(rc_str)s for resource provider %(rp_uuid)s because "
"the inventory is in use.")
msg_args['rc_str'] = rc_str
LOG.warning(msg, msg_args)
return
msg = ("[%(placement_req_id)s] Failed to delete inventory for "
"resource provider %(rp_uuid)s. Got error response: %(err)s.")
msg_args['err'] = r.text
LOG.error(msg, msg_args)
def get_provider_tree_and_ensure_root(self, context, rp_uuid, name=None,
parent_provider_uuid=None):
"""Returns a fresh ProviderTree representing all providers which are in
@ -1059,10 +999,12 @@ class SchedulerReportClient(object):
# Auto-create custom resource classes coming from a virt driver
self._ensure_resource_classes(context, set(inv_data))
if inv_data:
self._update_inventory(context, rp_uuid, inv_data)
else:
self._delete_inventory(context, rp_uuid)
# NOTE(efried): Do not use the DELETE API introduced in microversion
# 1.5, even if the new inventory is empty. It provides no way of
# sending the generation down, so no way to trigger/detect a conflict
# if an out-of-band update occurs between when we GET the latest and
# when we invoke the DELETE. See bug #1746374.
self._update_inventory(context, rp_uuid, inv_data)
@safe_connect
def _ensure_traits(self, context, traits):
@ -1265,10 +1207,12 @@ class SchedulerReportClient(object):
self._ensure_resource_provider(context, compute_node.uuid,
compute_node.hypervisor_hostname)
inv_data = _compute_node_to_inventory_dict(compute_node)
if inv_data:
self._update_inventory(context, compute_node.uuid, inv_data)
else:
self._delete_inventory(context, compute_node.uuid)
# NOTE(efried): Do not use the DELETE API introduced in microversion
# 1.5, even if the new inventory is empty. It provides no way of
# sending the generation down, so no way to trigger/detect a conflict
# if an out-of-band update occurs between when we GET the latest and
# when we invoke the DELETE. See bug #1746374.
self._update_inventory(context, compute_node.uuid, inv_data)
@safe_connect
def get_allocations_for_consumer(self, context, consumer):

View File

@ -15,7 +15,6 @@ import time
from keystoneauth1 import exceptions as ks_exc
import mock
import requests
import six
from six.moves.urllib import parse
import nova.conf
@ -2448,11 +2447,9 @@ class TestComputeNodeToInventoryDict(test.NoDBTestCase):
class TestInventory(SchedulerReportClientTestCase):
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_update_inventory')
def test_update_compute_node(self, mock_ui, mock_delete, mock_erp):
def test_update_compute_node(self, mock_ui, mock_erp):
cn = self.compute_node
self.client.update_compute_node(self.context, cn)
mock_erp.assert_called_once_with(self.context, cn.uuid,
@ -2488,18 +2485,14 @@ class TestInventory(SchedulerReportClientTestCase):
cn.uuid,
expected_inv_data,
)
self.assertFalse(mock_delete.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_update_inventory')
def test_update_compute_node_no_inv(self, mock_ui, mock_delete,
mock_erp):
"""Ensure that if there are no inventory records, that we call
_delete_inventory() instead of _update_inventory().
def test_update_compute_node_no_inv(self, mock_ui, mock_erp):
"""Ensure that if there are no inventory records, we still call
_update_inventory().
"""
cn = self.compute_node
cn.vcpus = 0
@ -2508,180 +2501,7 @@ class TestInventory(SchedulerReportClientTestCase):
self.client.update_compute_node(self.context, cn)
mock_erp.assert_called_once_with(self.context, cn.uuid,
cn.hypervisor_hostname)
mock_delete.assert_called_once_with(self.context, cn.uuid)
self.assertFalse(mock_ui.called)
@mock.patch.object(report.LOG, 'info')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
def test_delete_inventory(self, mock_get, mock_delete, mock_info):
cn = self.compute_node
# Make sure the resource provider exists for preventing to call the API
self._init_provider_tree()
mock_get.return_value.json.return_value = {
'resource_provider_generation': 1,
'inventories': {
'VCPU': {'total': 16},
}
}
mock_delete.return_value.status_code = 204
mock_delete.return_value.headers = {'x-openstack-request-id':
uuids.request_id}
result = self.client._delete_inventory(self.context, cn.uuid)
self.assertIsNone(result)
self.assertTrue(mock_delete.called)
self.assertEqual(uuids.request_id,
mock_info.call_args[0][1]['placement_req_id'])
mock_delete.assert_called_once_with(
'/resource_providers/%s/inventories' % cn.uuid,
version='1.5', global_request_id=self.context.global_id)
@mock.patch('nova.scheduler.client.report._extract_inventory_in_use')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
def test_delete_inventory_already_no_inventory(self, mock_get, mock_delete,
mock_extract):
cn = self.compute_node
# Make sure the resource provider exists for preventing to call the API
self._init_provider_tree()
mock_get.return_value.json.return_value = {
'resource_provider_generation': 1,
'inventories': {
}
}
result = self.client._delete_inventory(self.context, cn.uuid)
self.assertIsNone(result)
self.assertFalse(mock_delete.called)
self.assertFalse(mock_extract.called)
self._validate_provider(cn.uuid, generation=1)
@mock.patch.object(report.LOG, 'warning')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
def test_delete_inventory_inventory_in_use(self, mock_get, mock_delete,
mock_warn):
cn = self.compute_node
# Make sure the resource provider exists for preventing to call the API
self._init_provider_tree()
mock_get.return_value.json.return_value = {
'resource_provider_generation': 1,
'inventories': {
'VCPU': {'total': 16},
'MEMORY_MB': {'total': 1024},
'DISK_GB': {'total': 10},
}
}
mock_delete.return_value.status_code = 409
mock_delete.return_value.headers = {'x-openstack-request-id':
uuids.request_id}
rc_str = "VCPU, MEMORY_MB"
in_use_exc = exception.InventoryInUse(
resource_classes=rc_str,
resource_provider=cn.uuid,
)
fault_text = """
409 Conflict
There was a conflict when trying to complete your request.
update conflict: %s
""" % six.text_type(in_use_exc)
mock_delete.return_value.text = fault_text
mock_delete.return_value.json.return_value = {
'resource_provider_generation': 44,
'inventories': {
}
}
result = self.client._delete_inventory(self.context, cn.uuid)
self.assertIsNone(result)
self.assertTrue(mock_warn.called)
self.assertEqual(uuids.request_id,
mock_warn.call_args[0][1]['placement_req_id'])
@mock.patch.object(report.LOG, 'debug')
@mock.patch.object(report.LOG, 'info')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
def test_delete_inventory_inventory_404(self, mock_get, mock_delete,
mock_info, mock_debug):
"""Test that when we attempt to delete all the inventory for a resource
provider but another thread has already deleted that resource provider,
that we simply remove the resource provider from our local cache and
return.
"""
cn = self.compute_node
# Make sure the resource provider exists for preventing to call the API
self._init_provider_tree()
self.client.association_refresh_time[uuids.cn] = mock.Mock()
mock_get.return_value.json.return_value = {
'resource_provider_generation': 1,
'inventories': {
'VCPU': {'total': 16},
'MEMORY_MB': {'total': 1024},
'DISK_GB': {'total': 10},
}
}
mock_delete.return_value.status_code = 404
mock_delete.return_value.headers = {'x-openstack-request-id':
uuids.request_id}
result = self.client._delete_inventory(self.context, cn.uuid)
self.assertIsNone(result)
self.assertFalse(self.client._provider_tree.exists(cn.uuid))
self.assertTrue(mock_debug.called)
self.assertNotIn(cn.uuid, self.client.association_refresh_time)
self.assertIn('deleted by another thread', mock_debug.call_args[0][0])
self.assertEqual(uuids.request_id,
mock_debug.call_args[0][1]['placement_req_id'])
@mock.patch.object(report.LOG, 'error')
@mock.patch.object(report.LOG, 'warning')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
def test_delete_inventory_inventory_error(self, mock_get, mock_delete,
mock_warn, mock_error):
cn = self.compute_node
# Make sure the resource provider exists for preventing to call the API
self._init_provider_tree()
mock_get.return_value.json.return_value = {
'resource_provider_generation': 1,
'inventories': {
'VCPU': {'total': 16},
'MEMORY_MB': {'total': 1024},
'DISK_GB': {'total': 10},
}
}
mock_delete.return_value.status_code = 409
mock_delete.return_value.text = (
'There was a failure'
)
mock_delete.return_value.json.return_value = {
'resource_provider_generation': 44,
'inventories': {
}
}
mock_delete.return_value.headers = {'x-openstack-request-id':
uuids.request_id}
result = self.client._delete_inventory(self.context, cn.uuid)
self.assertIsNone(result)
self.assertFalse(mock_warn.called)
self.assertTrue(mock_error.called)
self.assertEqual(uuids.request_id,
mock_error.call_args[0][1]['placement_req_id'])
mock_ui.assert_called_once_with(self.context, cn.uuid, {})
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get')
@ -3116,14 +2936,12 @@ There was a conflict when trying to complete your request.
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_update_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_classes')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
def test_set_inventory_for_provider_no_custom(self, mock_erp, mock_erc,
mock_del, mock_upd):
mock_upd):
"""Tests that inventory records of all standard resource classes are
passed to the report client's _update_inventory() method.
"""
@ -3173,18 +2991,15 @@ There was a conflict when trying to complete your request.
mock.sentinel.rp_uuid,
inv_data,
)
self.assertFalse(mock_del.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_update_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_classes')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
def test_set_inventory_for_provider_no_inv(self, mock_erp, mock_erc,
mock_del, mock_upd):
mock_upd):
"""Tests that passing empty set of inventory records triggers a delete
of inventory for the provider.
"""
@ -3202,19 +3017,17 @@ There was a conflict when trying to complete your request.
parent_provider_uuid=None,
)
mock_erc.assert_called_once_with(self.context, set())
self.assertFalse(mock_upd.called)
mock_del.assert_called_once_with(self.context, mock.sentinel.rp_uuid)
mock_upd.assert_called_once_with(
self.context, mock.sentinel.rp_uuid, {})
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_update_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_classes')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
def test_set_inventory_for_provider_with_custom(self, mock_erp,
mock_erc, mock_del, mock_upd):
def test_set_inventory_for_provider_with_custom(self, mock_erp, mock_erc,
mock_upd):
"""Tests that inventory records that include a custom resource class
are passed to the report client's _update_inventory() method and that
the custom resource class is auto-created.
@ -3274,10 +3087,7 @@ There was a conflict when trying to complete your request.
mock.sentinel.rp_uuid,
inv_data,
)
self.assertFalse(mock_del.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_delete_inventory', new=mock.Mock())
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_classes', new=mock.Mock())
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'